Skip to content

Commit 75c4e87

Browse files
authored
Merge pull request #427 from mmarcinkiewicz/patch-2
Update rcps_llama31_8b.json
2 parents b203cbc + 3031b23 commit 75c4e87

File tree

1 file changed

+84
-21
lines changed

1 file changed

+84
-21
lines changed
Lines changed: 84 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,88 @@
11
{
22

3-
"llama31_8b_ref_X":
4-
{
5-
"Benchmark": "",
6-
"Creator": "",
7-
"When": "",
8-
"Platform": "",
9-
"BS": 0,
10-
"Hyperparams": {
11-
"opt_base_learning_rate": 0,
12-
"opt_epsilon": 0,
13-
"opt_learning_rate_training_steps": 0,
14-
"num_warmup_steps": 0,
15-
"start_warmup_step": 0,
16-
"opt_lamb_beta_1": 0,
17-
"opt_lamb_beta_2": 0,
18-
"opt_lamb_weight_decay_rate": 0,
19-
"gradient_accumulation_steps": 0
3+
"llama31_8b_ref_32":
4+
{
5+
"Benchmark": "llama31_8b",
6+
"Creator": "NVIDIA",
7+
"When": "Reference RCPs before 5.1 submission",
8+
"Platform": "2xDGX-B200",
9+
"Precision": "BF16",
10+
"BS": 32,
11+
"Hyperparams": {
12+
"opt_base_learning_rate": 1e-03,
13+
"opt_learning_rate_warmup_samples": 16348,
14+
"gradient_accumulation_steps": 2
15+
},
16+
"Epochs to converge": [
17+
196608, 221184, 208896, 221184, 221184,
18+
208896, 208896, 196608, 233472, 208896,
19+
233472, 208896, 208896, 221184, 233472,
20+
196608, 208896, 233472, 221184, 208896
21+
]
2022
},
21-
"Epochs to converge": [
22-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
24-
}
23+
24+
"llama31_8b_ref_64":
25+
{
26+
"Benchmark": "llama31_8b",
27+
"Creator": "NVIDIA",
28+
"When": "Reference RCPs before 5.1 submission",
29+
"Platform": "2xDGX-B200",
30+
"Precision": "BF16",
31+
"BS": 64,
32+
"Hyperparams": {
33+
"opt_base_learning_rate": 1e-03,
34+
"opt_learning_rate_warmup_samples": 16348,
35+
"gradient_accumulation_steps": 4
36+
},
37+
"Epochs to converge": [
38+
233472, 221184, 233472, 221184, 221184,
39+
245760, 233472, 233472, 208896, 245760,
40+
233472, 221184, 233472, 233472, 221184,
41+
245760, 221184, 233472, 233472, 233472
42+
]
43+
},
44+
45+
"llama31_8b_ref_96":
46+
{
47+
"Benchmark": "llama31_8b",
48+
"Creator": "NVIDIA",
49+
"When": "Reference RCPs before 5.1 submission",
50+
"Platform": "2xDGX-B200",
51+
"Precision": "BF16",
52+
"BS": 96,
53+
"Hyperparams": {
54+
"opt_base_learning_rate": 1e-03,
55+
"opt_learning_rate_warmup_samples": 16348,
56+
"gradient_accumulation_steps": 6
57+
},
58+
"Epochs to converge": [
59+
297216, 284832, 272448, 272448, 272448,
60+
272448, 297216, 272448, 297216, 272448,
61+
297216, 260064, 272448, 272448, 272448,
62+
284832, 260064, 284832, 284832, 272448
63+
]
64+
},
65+
66+
"llama31_8b_ref_128":
67+
{
68+
"Benchmark": "llama31_8b",
69+
"Creator": "NVIDIA",
70+
"When": "Reference RCPs before 5.1 submission",
71+
"Platform": "4xDGX-B200",
72+
"Precision": "BF16",
73+
"BS": 128,
74+
"Hyperparams": {
75+
"opt_base_learning_rate": 2e-03,
76+
"opt_learning_rate_warmup_samples": 32768,
77+
"gradient_accumulation_steps": 4
78+
},
79+
"Epochs to converge": [
80+
368640, 344064, 356352, 344064, 368640,
81+
368640, 405504, 344064, 331776, 307200,
82+
331776, 380928, 307200, 344064, 319488,
83+
356352, 331776, 319488, 356352, 331776
84+
]
85+
}
2586
}
87+
88+

0 commit comments

Comments
 (0)