diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json index 646e62c..4e5a331 100644 --- a/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json +++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_8b.json @@ -26,19 +26,19 @@ "Benchmark": "llama31_8b", "Creator": "NVIDIA", "When": "Reference RCPs before 5.1 submission", - "Platform": "2xDGX-B200", + "Platform": "4xDGX-B200", "Precision": "BF16", "BS": 64, "Hyperparams": { - "opt_base_learning_rate": 1e-03, - "opt_learning_rate_warmup_samples": 16348, - "gradient_accumulation_steps": 4 + "opt_base_learning_rate": 8e-04, + "opt_learning_rate_warmup_samples": 6144, + "gradient_accumulation_steps": 2 }, "Epochs to converge": [ - 233472, 221184, 233472, 221184, 221184, - 245760, 233472, 233472, 208896, 245760, - 233472, 221184, 233472, 233472, 221184, - 245760, 221184, 233472, 233472, 233472 + 233472, 208896, 208896, 233472, 233472, + 233472, 233472, 233472, 208896, 233472, + 233472, 233472, 245760, 221184, 208896, + 233472, 233472, 221184, 221184, 221184 ] }, @@ -88,3 +88,4 @@ +