Skip to content

Commit c9cf667

Browse files
committed
test 12.6.3
1 parent 62e7444 commit c9cf667

File tree

2 files changed

+10
-8
lines changed

2 files changed

+10
-8
lines changed

.github/workflows/ci_gpu.yml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ jobs:
115115
build-and-test-llama3:
116116
name: Build and test LLama3.2 1B
117117
runs-on: ubicloud-gpu-standard-1-latest
118+
container:
119+
image: nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
118120
env:
119121
HF_TOKEN: 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
120122
steps:
@@ -123,21 +125,21 @@ jobs:
123125
- run: echo "::add-mask::$HF_TOKEN"
124126

125127
- name: Install OpenMP
126-
run: sudo apt-get update && sudo apt-get install -y libomp-dev
128+
run: apt-get update && apt-get install -y libomp-dev libopenmpi-dev python3-pip
127129

128130
- name: Install dependencies
129131
run: pip install -r requirements.txt
130132

131133
- name: Run preprocessing
132-
run: python dev/data/tinyshakespeare.py --model_desc llama-3
134+
run: python3 dev/data/tinyshakespeare.py --model_desc llama-3
133135

134136
- name: Train model
135137
# use the first 10 layers, so that everything fits into the 20GB of
136138
# the A4000 Ada that we have in CI
137-
run: python train_llama3.py --write_tensors 1 --dtype float32 --depth 10
139+
run: python3 train_llama3.py --write_tensors 1 --dtype float32 --depth 10
138140

139141
- name: Build FP32 precision
140-
run: PRECISION=FP32 make test_llama3cu
142+
run: PRECISION=FP32 NO_MULTI_GPU=1 make test_llama3cu
141143

142144
- name: Run default
143145
run: ./test_llama3cu
@@ -149,7 +151,7 @@ jobs:
149151
run: ./test_llama3cu -r 2
150152

151153
- name: Build BF16 precision
152-
run: PRECISION=BF16 make train_llama3cu test_llama3cu
154+
run: PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
153155

154156
- name: Run default (BF16)
155157
run: ./test_llama3cu
@@ -174,7 +176,7 @@ jobs:
174176
- run: echo "::add-mask::$HF_TOKEN"
175177

176178
- name: Install OpenMP
177-
run: sudo apt-get update && sudo apt-get install -y libomp-dev
179+
run: sudo apt-get update && sudo apt-get install -y libomp-dev git
178180

179181
- name: Install dependencies
180182
run: pip install -r requirements.txt
@@ -202,7 +204,7 @@ jobs:
202204
git clone https://github.com/NVIDIA/cudnn-frontend.git
203205

204206
- name: Build with cuDNN
205-
run: USE_CUDNN=1 PRECISION=BF16 make train_llama3cu test_llama3cu
207+
run: USE_CUDNN=1 PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
206208

207209
- name: Train model with cuDNN
208210
run: ./train_llama3cu

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ ifeq ($(USE_CUDNN), 1)
122122
$(error ✗ cuDNN not found. See the README for install instructions and the Makefile for hard-coded paths)
123123
endif
124124
NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
125-
NVCC_LDFLAGS += -lcudnn
125+
NVCC_LDFLAGS += -lcudnn -L$(CUDNN_LIB_DIR)
126126
NVCC_FLAGS += -DENABLE_CUDNN
127127
NVCC_CUDNN = $(BUILD_DIR)/cudnn_att.o
128128
else

0 commit comments

Comments
 (0)