@@ -115,6 +115,8 @@ jobs:
115115 build-and-test-llama3 :
116116 name : Build and test LLama3.2 1B
117117 runs-on : ubicloud-gpu-standard-1-latest
118+ container :
119+ image : nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
118120 env :
119121 HF_TOKEN : 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
120122 steps :
@@ -123,21 +125,21 @@ jobs:
123125 - run : echo "::add-mask::$HF_TOKEN"
124126
125127 - name : Install OpenMP
126- run : sudo apt-get update && sudo apt-get install -y libomp-dev
128+ run : apt-get update && apt-get install -y libomp-dev libopenmpi-dev python3-pip
127129
128130 - name : Install dependencies
129131 run : pip install -r requirements.txt
130132
131133 - name : Run preprocessing
132- run : python dev/data/tinyshakespeare.py --model_desc llama-3
134+ run : python3 dev/data/tinyshakespeare.py --model_desc llama-3
133135
134136 - name : Train model
135137 # use the first 10 layers, so that everything fits into the 20GB of
136138 # the A4000 Ada that we have in CI
137- run : python train_llama3.py --write_tensors 1 --dtype float32 --depth 10
139+ run : python3 train_llama3.py --write_tensors 1 --dtype float32 --depth 10
138140
139141 - name : Build FP32 precision
140- run : PRECISION=FP32 make test_llama3cu
142+ run : PRECISION=FP32 NO_MULTI_GPU=1 make test_llama3cu
141143
142144 - name : Run default
143145 run : ./test_llama3cu
@@ -149,7 +151,7 @@ jobs:
149151 run : ./test_llama3cu -r 2
150152
151153 - name : Build BF16 precision
152- run : PRECISION=BF16 make train_llama3cu test_llama3cu
154+ run : PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
153155
154156 - name : Run default (BF16)
155157 run : ./test_llama3cu
@@ -174,7 +176,7 @@ jobs:
174176 - run : echo "::add-mask::$HF_TOKEN"
175177
176178 - name : Install OpenMP
177- run : sudo apt-get update && sudo apt-get install -y libomp-dev
179+ run : sudo apt-get update && sudo apt-get install -y libomp-dev git
178180
179181 - name : Install dependencies
180182 run : pip install -r requirements.txt
@@ -202,7 +204,7 @@ jobs:
202204 git clone https://github.com/NVIDIA/cudnn-frontend.git
203205
204206 - name : Build with cuDNN
205- run : USE_CUDNN=1 PRECISION=BF16 make train_llama3cu test_llama3cu
207+ run : USE_CUDNN=1 PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
206208
207209 - name : Train model with cuDNN
208210 run : ./train_llama3cu
0 commit comments