Skip to content

Commit 4272f73

Browse files
authored
Merge pull request #228 from shangw-nvidia/shangw-nvidia/rnnt_rcps
[RCP] Update rcps for rnnt.
2 parents 2f1f186 + e90963e commit 4272f73

File tree

1 file changed

+64
-11
lines changed

1 file changed

+64
-11
lines changed

mlperf_logging/rcp_checker/training_2.0.0/rcps_rnnt.json

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,84 @@
11
{
22

3+
"rnn_t_ref_0.5k":
4+
{
5+
"Benchmark": "rnnt",
6+
"Creator": "NVIDIA",
7+
"When": "Prior to 2.0 submission",
8+
"Platform": "TBD",
9+
"BS": 512,
10+
"Hyperparams": {
11+
"opt_base_learning_rate": 0.0062,
12+
"opt_lamb_learning_rate_hold_epochs": 11,
13+
"opt_lamb_learning_rate_decay_poly_power": 0.915,
14+
"opt_learning_rate_warmup_epochs": 1,
15+
"opt_weight_decay": 0.001,
16+
"opt_lamb_beta_1": 0.9,
17+
"opt_lamb_beta_2": 0.9985,
18+
"opt_lamb_learning_rate_min": 1e-5,
19+
"opt_gradient_clip_norm": 1.0,
20+
"opt_gradient_accumulation_steps": 2,
21+
"model_eva_ema_factor": 0.994,
22+
"model_weights_initialization_scale": 0.45,
23+
"data_train_num_buckets": 1
24+
},
25+
"Epochs to converge": [
26+
39, 40, 43, 41, 39, 52, 43, 44, 40, 42,
27+
40, 40, 42, 37, 40, 41, 46, 51, 41, 40 ]
28+
},
29+
330
"rnn_t_ref_1k":
431
{
532
"Benchmark": "rnnt",
633
"Creator": "NVIDIA",
7-
"When": "Prior to 1.1 submission",
34+
"When": "Prior to 2.0 submission",
835
"Platform": "TBD",
936
"BS": 1024,
1037
"Hyperparams": {
11-
"opt_base_learning_rate": 0.004,
12-
"opt_lamb_learning_rate_hold_epochs": 40,
13-
"opt_lamb_learning_rate_decay_poly_power": 0.935,
14-
"opt_learning_rate_warmup_epochs": 6,
38+
"opt_base_learning_rate": 0.007447,
39+
"opt_lamb_learning_rate_hold_epochs": 17,
40+
"opt_lamb_learning_rate_decay_poly_power": 0.9037,
41+
"opt_learning_rate_warmup_epochs": 3,
42+
"opt_weight_decay": 0.001,
43+
"opt_lamb_beta_1": 0.9,
44+
"opt_lamb_beta_2": 0.999,
45+
"opt_lamb_learning_rate_min": 1e-5,
46+
"opt_gradient_clip_norm": 1.0,
47+
"opt_gradient_accumulation_steps": 4,
48+
"model_eva_ema_factor": 0.992,
49+
"model_weights_initialization_scale": 0.5,
50+
"data_train_num_buckets": 1
51+
},
52+
"Epochs to converge": [
53+
44, 42, 41, 45, 43, 49, 43, 45, 50, 41,
54+
48, 41, 47, 46, 46, 44, 42, 50, 43, 42 ]
55+
},
56+
57+
"rnn_t_ref_1.5k":
58+
{
59+
"Benchmark": "rnnt",
60+
"Creator": "NVIDIA",
61+
"When": "Prior to 2.0 submission",
62+
"Platform": "TBD",
63+
"BS": 1536,
64+
"Hyperparams": {
65+
"opt_base_learning_rate": 0.0072,
66+
"opt_lamb_learning_rate_hold_epochs": 26,
67+
"opt_lamb_learning_rate_decay_poly_power": 0.92,
68+
"opt_learning_rate_warmup_epochs": 5,
1569
"opt_weight_decay": 0.001,
1670
"opt_lamb_beta_1": 0.9,
1771
"opt_lamb_beta_2": 0.999,
1872
"opt_lamb_learning_rate_min": 1e-5,
19-
"opt_gradient_clip_norm": "none",
20-
"opt_gradient_accumulation_steps": 8,
21-
"model_eva_ema_factor": 0.999,
73+
"opt_gradient_clip_norm": 1.0,
74+
"opt_gradient_accumulation_steps": 4,
75+
"model_eva_ema_factor": 0.995,
2276
"model_weights_initialization_scale": 0.5,
2377
"data_train_num_buckets": 1
2478
},
2579
"Epochs to converge": [
26-
59, 57, 59, 54, 57, 58, 56, 58, 55, 58,
27-
58, 58, 62, 61, 63, 60, 57, 59, 57, 63 ]
80+
46, 49, 47, 49, 49, 50, 46, 51, 48, 50,
81+
52, 48, 47, 47, 51, 52, 47, 50, 50, 49 ]
2882
},
2983

3084
"rnn_t_ref_2k":
@@ -112,4 +166,3 @@
112166
}
113167

114168
}
115-

0 commit comments

Comments
 (0)