Skip to content

Commit 369260b

Browse files
authored
Merge pull request #374 from ShriyaPalsamudram/shriya/add_new_gpt3_rcps
Add GBS 6144 RCPs for GPT3 benchmark
2 parents b5cb220 + 02d3ba3 commit 369260b

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
3+
"gpt3_ref_1536":
4+
{
5+
"Benchmark": "gpt3",
6+
"Creator": "Google & NVIDIA",
7+
"When": "Prior to 3.0 submission",
8+
"Platform": "TPU-v4-1536 / PaxML, 1024 A100-80GB / Megatron-LM",
9+
"BS": 1536,
10+
"Hyperparams": {
11+
"opt_base_learning_rate": 2e-5
12+
},
13+
"Epochs to converge": [
14+
1157627904, 1157627904, 1157627904, 1258291200, 1207959552, 1258291200
15+
]
16+
},
17+
18+
"gpt3_ref_2048":
19+
{
20+
"Benchmark": "gpt3",
21+
"Creator": "Google & NVIDIA",
22+
"When": "Prior to 3.0 submission",
23+
"Platform": "TPU-v4-2048 / PaxML, 1024 A100-80GB / Megatron-LM",
24+
"BS": 2048,
25+
"Hyperparams": {
26+
"opt_base_learning_rate": 2e-5
27+
},
28+
"Epochs to converge": [
29+
1157627904, 1207959552, 1157627904, 1207959552, 1207959552, 1157627904, 1157627904
30+
]
31+
},
32+
33+
"gpt3_ref_3072":
34+
{
35+
"Benchmark": "gpt3",
36+
"Creator": "Google & NVIDIA",
37+
"When": "Prior to 3.0 submission",
38+
"Platform": "TPU-v4-1536 / PaxML, 1024 A100-80GB / Megatron-LM",
39+
"BS": 3072,
40+
"Hyperparams": {
41+
"opt_base_learning_rate": 2e-5
42+
},
43+
"Epochs to converge": [
44+
1258291200, 1207959552, 1207959552, 1207959552, 1207959552, 1207959552, 13790871552
45+
]
46+
},
47+
48+
"gpt3_ref_4096":
49+
{
50+
"Benchmark": "gpt3",
51+
"Creator": "Google & NVIDIA",
52+
"When": "Prior to 3.0 submission",
53+
"Platform": "TPU-v4-2048 / PaxML, 1024 A100-80GB / Megatron-LM",
54+
"BS": 4096,
55+
"Hyperparams": {
56+
"opt_base_learning_rate": 3e-5
57+
},
58+
"Epochs to converge": [
59+
1258291200, 1258291200, 1308622848, 1258291200, 1258291200, 1258291200
60+
]
61+
},
62+
63+
"gpt3_ref_6144":
64+
{
65+
"Benchmark": "gpt3",
66+
"Creator": "Google & NVIDIA",
67+
"When": "Prior to 3.0 submission",
68+
"Platform": "TPU-v4-2048 / PaxML, 1024 H100-80GB / Megatron-LM",
69+
"BS": 6144,
70+
"Hyperparams": {
71+
"opt_base_learning_rate": 3e-5
72+
},
73+
"Epochs to converge": [
74+
1409286144, 1409286144, 1409286144, 1409286144, 1409286144, 1409286144
75+
]
76+
},
77+
78+
"gpt3_ref_8192":
79+
{
80+
"Benchmark": "gpt3",
81+
"Creator": "Google & NVIDIA",
82+
"When": "Prior to 3.0 submission",
83+
"Platform": "TPU-v4-2048 / PaxML, 1024 A100-80GB / Megatron-LM",
84+
"BS": 8192,
85+
"Hyperparams": {
86+
"opt_base_learning_rate": 3e-5
87+
},
88+
"Epochs to converge": [
89+
1610612736, 1660944384, 1660944384, 1610612736, 1610612736, 1610612736
90+
]
91+
}
92+
93+
}

0 commit comments

Comments
 (0)