Skip to content

Commit ff57824

Browse files
authored
Merge pull request #424 from mmarcinkiewicz/patch-1
Create rcps_llama31_405b.json
2 parents b9d07eb + e5d7b4c commit ff57824

File tree

1 file changed

+13
-65
lines changed

1 file changed

+13
-65
lines changed
Lines changed: 13 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,9 @@
1-
{
2-
"llama31_405b_ref_1008":
3-
{
4-
"Benchmark": "llama31_405b",
5-
"Creator": "NVIDIA",
6-
"When": "Reference RCPs before 5.0 submission",
7-
"Platform": "288xDGX-H100",
8-
"Precision": "BF16",
9-
"BS": 1008,
10-
"Hyperparams": {
11-
"opt_base_learning_rate": 7e-05,
12-
"opt_learning_rate_warmup_steps": 9143,
13-
"gradient_accumulation_steps": 126
14-
},
15-
"Epochs to converge": [
16-
324576,324576,324576,
17-
324576,324576,324576
18-
]
19-
},
1+
{
202
"llama31_405b_ref_1152":
213
{
224
"Benchmark": "llama31_405b",
235
"Creator": "NVIDIA",
24-
"When": "Reference RCPs before 5.0 submission",
6+
"When": "Reference RCPs after 5.0 submission",
257
"Platform": "288xDGX-H100",
268
"Precision": "BF16",
279
"BS": 1152,
@@ -31,16 +13,16 @@
3113
"gradient_accumulation_steps": 144
3214
},
3315
"Epochs to converge": [
34-
322560,322560,322560,
35-
322560,322560,322560
16+
313344,313344,313344,
17+
331776,313344,294912
3618
]
3719
},
3820

3921
"llama31_405b_ref_2304":
4022
{
4123
"Benchmark": "llama31_405b",
4224
"Creator": "NVIDIA",
43-
"When": "Reference RCPs before 5.0 submission",
25+
"When": "Reference RCPs after 5.0 submission",
4426
"Platform": "288xDGX-H100",
4527
"Precision": "BF16",
4628
"BS": 2304,
@@ -50,15 +32,16 @@
5032
"gradient_accumulation_steps": 288
5133
},
5234
"Epochs to converge": [
53-
368640,368640,368640,
54-
368640,414720,414720
35+
368640,350208,387072,
36+
368640,368640,368640
5537
]
5638
},
39+
5740
"llama31_405b_ref_4608":
5841
{
5942
"Benchmark": "llama31_405b",
6043
"Creator": "NVIDIA",
61-
"When": "Reference RCPs before 5.0 submission",
44+
"When": "Reference RCPs after 5.0 submission",
6245
"Platform": "288xDGX-H100",
6346
"Precision": "BF16",
6447
"BS": 4608,
@@ -68,45 +51,10 @@
6851
"gradient_accumulation_steps": 576
6952
},
7053
"Epochs to converge": [
71-
460800,460800,506880,
72-
506880,506880,506880
73-
]
74-
},
75-
"llama31_405b_ref_6912":
76-
{
77-
"Benchmark": "llama31_405b",
78-
"Creator": "NVIDIA",
79-
"When": "Reference RCPs before 5.0 submission",
80-
"Platform": "72xDGX-H100",
81-
"Precision": "BF16",
82-
"BS": 6912,
83-
"Hyperparams": {
84-
"opt_base_learning_rate": 48e-05,
85-
"opt_learning_rate_warmup_steps": 1334,
86-
"gradient_accumulation_steps": 3456
87-
},
88-
"Epochs to converge": [
89-
580608,580608,580608,
90-
628992,628992,628992
91-
]
92-
},
93-
"llama31_405b_ref_9216":
94-
{
95-
"Benchmark": "llama31_405b",
96-
"Creator": "NVIDIA",
97-
"When": "Reference RCPs before 5.0 submission",
98-
"Platform": "288xDGX-H100",
99-
"Precision": "BF16",
100-
"BS": 9216,
101-
"Hyperparams": {
102-
"opt_base_learning_rate": 64e-05,
103-
"opt_learning_rate_warmup_steps": 1000,
104-
"gradient_accumulation_steps": 1152
105-
},
106-
"Epochs to converge": [
107-
645120,645120,691200,
108-
691200,737280,737280
54+
497664,497664,460800,
55+
497664,479232,497664
10956
]
11057
}
11158
}
112-
59+
60+

0 commit comments

Comments
 (0)