@@ -115,29 +115,29 @@ jobs:
115
115
build-and-test-llama3 :
116
116
name : Build and test LLama3.2 1B
117
117
runs-on : ubicloud-gpu-standard-1-latest
118
- env :
119
- HF_TOKEN : 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
118
+ container :
119
+ image : nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
120
120
steps :
121
121
- name : Checkout code
122
122
uses : actions/checkout@v4
123
- - run : echo "::add-mask::$HF_TOKEN "
123
+ - run : echo "::add-mask::$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') "
124
124
125
125
- name : Install OpenMP
126
- run : sudo apt-get update && sudo apt-get install -y libomp-dev
126
+ run : apt-get update && apt-get install -y libomp-dev libopenmpi-dev python3-pip
127
127
128
128
- name : Install dependencies
129
129
run : pip install -r requirements.txt
130
130
131
131
- name : Run preprocessing
132
- run : python dev/data/tinyshakespeare.py --model_desc llama-3
132
+ run : HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python3 dev/data/tinyshakespeare.py --model_desc llama-3
133
133
134
134
- name : Train model
135
135
# use the first 10 layers, so that everything fits into the 20GB of
136
136
# the A4000 Ada that we have in CI
137
- run : python train_llama3.py --write_tensors 1 --dtype float32 --depth 10
137
+ run : HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python3 train_llama3.py --write_tensors 1 --dtype float32 --depth 10
138
138
139
139
- name : Build FP32 precision
140
- run : PRECISION=FP32 make test_llama3cu
140
+ run : PRECISION=FP32 NO_MULTI_GPU=1 make test_llama3cu
141
141
142
142
- name : Run default
143
143
run : ./test_llama3cu
@@ -149,7 +149,7 @@ jobs:
149
149
run : ./test_llama3cu -r 2
150
150
151
151
- name : Build BF16 precision
152
- run : PRECISION=BF16 make train_llama3cu test_llama3cu
152
+ run : PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
153
153
154
154
- name : Run default (BF16)
155
155
run : ./test_llama3cu
@@ -166,15 +166,12 @@ jobs:
166
166
build-and-test-llama3-untied :
167
167
name : Build and test LLama3.2 1B with untie weights
168
168
runs-on : ubicloud-gpu-standard-1-latest
169
- env :
170
- HF_TOKEN : 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
171
169
steps :
172
170
- name : Checkout code
173
171
uses : actions/checkout@v4
174
- - run : echo "::add-mask::$HF_TOKEN"
175
172
176
173
- name : Install OpenMP
177
- run : sudo apt-get update && sudo apt-get install -y libomp-dev
174
+ run : sudo apt-get update && sudo apt-get install -y libomp-dev git
178
175
179
176
- name : Install dependencies
180
177
run : pip install -r requirements.txt
@@ -202,7 +199,7 @@ jobs:
202
199
git clone https://github.com/NVIDIA/cudnn-frontend.git
203
200
204
201
- name : Build with cuDNN
205
- run : USE_CUDNN=1 PRECISION=BF16 make train_llama3cu test_llama3cu
202
+ run : USE_CUDNN=1 PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
206
203
207
204
- name : Train model with cuDNN
208
205
run : ./train_llama3cu
0 commit comments