@@ -115,29 +115,29 @@ jobs:
115
115
build-and-test-llama3 :
116
116
name : Build and test LLama3.2 1B
117
117
runs-on : ubicloud-gpu-standard-1-latest
118
- env :
119
- HF_TOKEN : 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
118
+ container :
119
+ image : nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
120
120
steps :
121
121
- name : Checkout code
122
122
uses : actions/checkout@v4
123
- - run : echo "::add-mask::$HF_TOKEN "
123
+ - run : echo "::add-mask::$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') "
124
124
125
125
- name : Install OpenMP
126
- run : sudo apt-get update && sudo apt-get install -y libomp-dev
126
+ run : apt-get update && apt-get install -y libomp-dev libopenmpi-dev python3-pip
127
127
128
128
- name : Install dependencies
129
129
run : pip install -r requirements.txt
130
130
131
131
- name : Run preprocessing
132
- run : python dev/data/tinyshakespeare.py --model_desc llama-3
132
+ run : python3 dev/data/tinyshakespeare.py --model_desc llama-3
133
133
134
134
- name : Train model
135
135
# use the first 10 layers, so that everything fits into the 20GB of
136
136
# the A4000 Ada that we have in CI
137
- run : python train_llama3.py --write_tensors 1 --dtype float32 --depth 10
137
+ run : HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python3 train_llama3.py --write_tensors 1 --dtype float32 --depth 10
138
138
139
139
- name : Build FP32 precision
140
- run : PRECISION=FP32 make test_llama3cu
140
+ run : PRECISION=FP32 NO_MULTI_GPU=1 make test_llama3cu
141
141
142
142
- name : Run default
143
143
run : ./test_llama3cu
@@ -149,7 +149,7 @@ jobs:
149
149
run : ./test_llama3cu -r 2
150
150
151
151
- name : Build BF16 precision
152
- run : PRECISION=BF16 make train_llama3cu test_llama3cu
152
+ run : PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
153
153
154
154
- name : Run default (BF16)
155
155
run : ./test_llama3cu
@@ -166,15 +166,13 @@ jobs:
166
166
build-and-test-llama3-untied :
167
167
name : Build and test LLama3.2 1B with untie weights
168
168
runs-on : ubicloud-gpu-standard-1-latest
169
- env :
170
- HF_TOKEN : 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
171
169
steps :
172
170
- name : Checkout code
173
171
uses : actions/checkout@v4
174
- - run : echo "::add-mask::$HF_TOKEN "
172
+ - run : echo "::add-mask::$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') "
175
173
176
174
- name : Install OpenMP
177
- run : sudo apt-get update && sudo apt-get install -y libomp-dev
175
+ run : sudo apt-get update && sudo apt-get install -y libomp-dev git
178
176
179
177
- name : Install dependencies
180
178
run : pip install -r requirements.txt
@@ -183,7 +181,7 @@ jobs:
183
181
run : python dev/data/tinyshakespeare.py --model_desc llama-3
184
182
185
183
- name : Train model
186
- run : python train_llama3.py --write_tensors 1 --dtype float32 --untie 1 --depth 10
184
+ run : HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python train_llama3.py --write_tensors 1 --dtype float32 --untie 1 --depth 10
187
185
188
186
- name : Build FP32 precision
189
187
run : PRECISION=FP32 make test_llama3cu
@@ -202,7 +200,7 @@ jobs:
202
200
git clone https://github.com/NVIDIA/cudnn-frontend.git
203
201
204
202
- name : Build with cuDNN
205
- run : USE_CUDNN=1 PRECISION=BF16 make train_llama3cu test_llama3cu
203
+ run : USE_CUDNN=1 PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
206
204
207
205
- name : Train model with cuDNN
208
206
run : ./train_llama3cu
0 commit comments