1- {
2- "llama31_405b_ref_1008" :
3- {
4- "Benchmark" : " llama31_405b" ,
5- "Creator" : " NVIDIA" ,
6- "When" : " Reference RCPs before 5.0 submission" ,
7- "Platform" : " 288xDGX-H100" ,
8- "Precision" : " BF16" ,
9- "BS" : 1008 ,
10- "Hyperparams" : {
11- "opt_base_learning_rate" : 7e-05 ,
12- "opt_learning_rate_warmup_steps" : 9143 ,
13- "gradient_accumulation_steps" : 126
14- },
15- "Epochs to converge" : [
16- 324576 ,324576 ,324576 ,
17- 324576 ,324576 ,324576
18- ]
19- },
1+ {
202 "llama31_405b_ref_1152" :
213 {
224 "Benchmark" : " llama31_405b" ,
235 "Creator" : " NVIDIA" ,
24- "When" : " Reference RCPs before 5.0 submission" ,
6+ "When" : " Reference RCPs after 5.0 submission" ,
257 "Platform" : " 288xDGX-H100" ,
268 "Precision" : " BF16" ,
279 "BS" : 1152 ,
3113 "gradient_accumulation_steps" : 144
3214 },
3315 "Epochs to converge" : [
34- 322560 , 322560 , 322560 ,
35- 322560 , 322560 , 322560
16+ 313344 , 313344 , 313344 ,
17+ 331776 , 313344 , 294912
3618 ]
3719 },
3820
3921 "llama31_405b_ref_2304" :
4022 {
4123 "Benchmark" : " llama31_405b" ,
4224 "Creator" : " NVIDIA" ,
43- "When" : " Reference RCPs before 5.0 submission" ,
25+ "When" : " Reference RCPs after 5.0 submission" ,
4426 "Platform" : " 288xDGX-H100" ,
4527 "Precision" : " BF16" ,
4628 "BS" : 2304 ,
5032 "gradient_accumulation_steps" : 288
5133 },
5234 "Epochs to converge" : [
53- 368640 ,368640 , 368640 ,
54- 368640 ,414720 , 414720
35+ 368640 ,350208 , 387072 ,
36+ 368640 ,368640 , 368640
5537 ]
5638 },
39+
5740 "llama31_405b_ref_4608" :
5841 {
5942 "Benchmark" : " llama31_405b" ,
6043 "Creator" : " NVIDIA" ,
61- "When" : " Reference RCPs before 5.0 submission" ,
44+ "When" : " Reference RCPs after 5.0 submission" ,
6245 "Platform" : " 288xDGX-H100" ,
6346 "Precision" : " BF16" ,
6447 "BS" : 4608 ,
6851 "gradient_accumulation_steps" : 576
6952 },
7053 "Epochs to converge" : [
71- 460800 ,460800 ,506880 ,
72- 506880 ,506880 ,506880
73- ]
74- },
75- "llama31_405b_ref_6912" :
76- {
77- "Benchmark" : " llama31_405b" ,
78- "Creator" : " NVIDIA" ,
79- "When" : " Reference RCPs before 5.0 submission" ,
80- "Platform" : " 72xDGX-H100" ,
81- "Precision" : " BF16" ,
82- "BS" : 6912 ,
83- "Hyperparams" : {
84- "opt_base_learning_rate" : 48e-05 ,
85- "opt_learning_rate_warmup_steps" : 1334 ,
86- "gradient_accumulation_steps" : 3456
87- },
88- "Epochs to converge" : [
89- 580608 ,580608 ,580608 ,
90- 628992 ,628992 ,628992
91- ]
92- },
93- "llama31_405b_ref_9216" :
94- {
95- "Benchmark" : " llama31_405b" ,
96- "Creator" : " NVIDIA" ,
97- "When" : " Reference RCPs before 5.0 submission" ,
98- "Platform" : " 288xDGX-H100" ,
99- "Precision" : " BF16" ,
100- "BS" : 9216 ,
101- "Hyperparams" : {
102- "opt_base_learning_rate" : 64e-05 ,
103- "opt_learning_rate_warmup_steps" : 1000 ,
104- "gradient_accumulation_steps" : 1152
105- },
106- "Epochs to converge" : [
107- 645120 ,645120 ,691200 ,
108- 691200 ,737280 ,737280
54+ 497664 ,497664 ,460800 ,
55+ 497664 ,479232 ,497664
10956 ]
11057 }
11158 }
112-
59+
60+
0 commit comments