|
5 | 5 | "Benchmark": "llama31_8b", |
6 | 6 | "Creator": "NVIDIA", |
7 | 7 | "When": "Reference RCPs before 5.1 submission", |
8 | | - "Platform": "2xDGX-B200", |
| 8 | + "Platform": "4xDGX-B200", |
9 | 9 | "Precision": "BF16", |
10 | 10 | "BS": 32, |
11 | 11 | "Hyperparams": { |
12 | | - "opt_base_learning_rate": 1e-03, |
13 | | - "opt_learning_rate_warmup_samples": 16348, |
14 | | - "gradient_accumulation_steps": 2 |
| 12 | + "opt_base_learning_rate": 8e-04, |
| 13 | + "opt_learning_rate_warmup_samples": 4096, |
| 14 | + "gradient_accumulation_steps": 1 |
15 | 15 | }, |
16 | 16 | "Epochs to converge": [ |
17 | | - 196608, 221184, 208896, 221184, 221184, |
18 | | - 208896, 208896, 196608, 233472, 208896, |
19 | | - 233472, 208896, 208896, 221184, 233472, |
20 | | - 196608, 208896, 233472, 221184, 208896 |
| 17 | + 196608, 172032, 184320, 184320, 172032, |
| 18 | + 172032, 184320, 184320, 184320, 172032, |
| 19 | + 172032, 172032, 184320, 184320, 184320, |
| 20 | + 172032, 172032, 172032, 184320, 184320 |
21 | 21 | ] |
22 | 22 | }, |
23 | 23 |
|
|
86 | 86 | } |
87 | 87 |
|
88 | 88 |
|
| 89 | + |
| 90 | + |
0 commit comments