Skip to content

Commit 90a409c

Browse files
committed
use unsloth version of llama which does not require HF login
1 parent 62e7444 commit 90a409c

File tree

6 files changed

+16
-18
lines changed

6 files changed

+16
-18
lines changed

.github/workflows/ci_gpu.yml

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -115,29 +115,29 @@ jobs:
115115
build-and-test-llama3:
116116
name: Build and test LLama3.2 1B
117117
runs-on: ubicloud-gpu-standard-1-latest
118-
env:
119-
HF_TOKEN: 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
118+
container:
119+
image: nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
120120
steps:
121121
- name: Checkout code
122122
uses: actions/checkout@v4
123-
- run: echo "::add-mask::$HF_TOKEN"
123+
- run: echo "::add-mask::$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m')"
124124

125125
- name: Install OpenMP
126-
run: sudo apt-get update && sudo apt-get install -y libomp-dev
126+
run: apt-get update && apt-get install -y libomp-dev libopenmpi-dev python3-pip
127127

128128
- name: Install dependencies
129129
run: pip install -r requirements.txt
130130

131131
- name: Run preprocessing
132-
run: python dev/data/tinyshakespeare.py --model_desc llama-3
132+
run: HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python3 dev/data/tinyshakespeare.py --model_desc llama-3
133133

134134
- name: Train model
135135
# use the first 10 layers, so that everything fits into the 20GB of
136136
# the A4000 Ada that we have in CI
137-
run: python train_llama3.py --write_tensors 1 --dtype float32 --depth 10
137+
run: HF_TOKEN=$(echo us_xrYQGKBiJeqDMlTxkGhSgjelZKYbJHTgDY | tr 'A-Za-z' 'N-ZA-Mn-za-m') python3 train_llama3.py --write_tensors 1 --dtype float32 --depth 10
138138

139139
- name: Build FP32 precision
140-
run: PRECISION=FP32 make test_llama3cu
140+
run: PRECISION=FP32 NO_MULTI_GPU=1 make test_llama3cu
141141

142142
- name: Run default
143143
run: ./test_llama3cu
@@ -149,7 +149,7 @@ jobs:
149149
run: ./test_llama3cu -r 2
150150

151151
- name: Build BF16 precision
152-
run: PRECISION=BF16 make train_llama3cu test_llama3cu
152+
run: PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
153153

154154
- name: Run default (BF16)
155155
run: ./test_llama3cu
@@ -166,15 +166,12 @@ jobs:
166166
build-and-test-llama3-untied:
167167
name: Build and test LLama3.2 1B with untie weights
168168
runs-on: ubicloud-gpu-standard-1-latest
169-
env:
170-
HF_TOKEN: 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
171169
steps:
172170
- name: Checkout code
173171
uses: actions/checkout@v4
174-
- run: echo "::add-mask::$HF_TOKEN"
175172

176173
- name: Install OpenMP
177-
run: sudo apt-get update && sudo apt-get install -y libomp-dev
174+
run: sudo apt-get update && sudo apt-get install -y libomp-dev git
178175

179176
- name: Install dependencies
180177
run: pip install -r requirements.txt
@@ -202,7 +199,7 @@ jobs:
202199
git clone https://github.com/NVIDIA/cudnn-frontend.git
203200

204201
- name: Build with cuDNN
205-
run: USE_CUDNN=1 PRECISION=BF16 make train_llama3cu test_llama3cu
202+
run: USE_CUDNN=1 PRECISION=BF16 NO_MULTI_GPU=1 make train_llama3cu test_llama3cu
206203

207204
- name: Train model with cuDNN
208205
run: ./train_llama3cu

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ ifeq ($(USE_CUDNN), 1)
122122
$(error ✗ cuDNN not found. See the README for install instructions and the Makefile for hard-coded paths)
123123
endif
124124
NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
125-
NVCC_LDFLAGS += -lcudnn
125+
NVCC_LDFLAGS += -lcudnn -L$(CUDNN_LIB_DIR)
126126
NVCC_FLAGS += -DENABLE_CUDNN
127127
NVCC_CUDNN = $(BUILD_DIR)/cudnn_att.o
128128
else

dev/data/fineweb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666

6767
def tokenize_llama(doc):
6868
# tokenizes a single document and returns a numpy array of uint32 tokens
69-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
69+
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B")
7070
encode = lambda s: tokenizer.encode(s, add_special_tokens=False, verbose=False, split_special_tokens=True)
7171
eot = tokenizer.encode('')[0] # by default the tokenizer adds the EOT token (128000)
7272
tokens = [eot] # the special <|endoftext|> token delimits all documents

dev/data/tinyshakespeare.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def tokenize(model_desc):
5050
encode = lambda s: enc.encode_ordinary(s)
5151
eot = enc._special_tokens['<|endoftext|>'] # end of text token
5252
elif model_desc == "llama-3":
53-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
53+
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B")
5454
encode = lambda s: tokenizer.encode(s, add_special_tokens=False, verbose=False, split_special_tokens=True)
5555
eot = tokenizer.encode('')[0] # by default the tokenizer adds the EOT token (128000)
5656
else:

dev/data/tinystories.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def process_shard(shard_index, shard_filename, model_desc):
7676
encode = lambda s: enc.encode_ordinary(s)
7777
eot = enc._special_tokens['<|endoftext|>'] # end of text token
7878
elif model_desc == "llama-3":
79-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
79+
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B")
8080
encode = lambda s: tokenizer.encode(s, add_special_tokens=False, verbose=False, split_special_tokens=True)
8181
eot = tokenizer.encode('')[0] # by default the tokenizer adds the EOT token (128000)
8282
else:

train_llama3.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ def __post_init__(self):
297297
"meta-llama/Meta-Llama-3.1-8B": LLama3_8BConfig,
298298
"meta-llama/Llama-3.2-3B": LLama3_3BConfig,
299299
"meta-llama/Llama-3.2-1B": LLama3_1BConfig,
300+
"unsloth/Llama-3.2-1B": LLama3_1BConfig,
300301
}
301302

302303

@@ -1044,7 +1045,7 @@ def print0(*args, **kwargs):
10441045
parser.add_argument("--input_bin", type=str, default="dev/data/tinyshakespeare/tiny_shakespeare_val.bin", help="input .bin to train on")
10451046
parser.add_argument("--input_val_bin", type=str, default="", help="input .bin to eval validation loss on")
10461047
parser.add_argument("--output_dir", type=str, default="", help="output directory to which to write logs and checkpoints")
1047-
parser.add_argument("--model", type=str, default="meta-llama/Llama-3.2-1B", help="chose the llama model")
1048+
parser.add_argument("--model", type=str, default="unsloth/Llama-3.2-1B", help="chose the llama model")
10481049
parser.add_argument("--depth", type=int, default=-1, help="load only a subset of the model's layers")
10491050
parser.add_argument("--untie", type=int, default=False, help="Untie token embeddings and LM-head, even if they are tied in the checkpoint.")
10501051
# token layout for each step of the optimization

0 commit comments

Comments
 (0)