Merge pull request #424 from mmarcinkiewicz/patch-1

hiwotadese · web-flow · commit ff578240a1dd · 2025-08-07T09:08:16.000-07:00
Create rcps_llama31_405b.json
diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json
@@ -1,27 +1,9 @@
-{
-    "llama31_405b_ref_1008":
-    {
-      "Benchmark": "llama31_405b",
-      "Creator": "NVIDIA",
-      "When": "Reference RCPs before 5.0 submission",
-      "Platform": "288xDGX-H100",
-      "Precision": "BF16",
-      "BS": 1008,
-      "Hyperparams": {
-        "opt_base_learning_rate": 7e-05,
-        "opt_learning_rate_warmup_steps": 9143,
-        "gradient_accumulation_steps": 126
-      },
-      "Epochs to converge": [
-        324576,324576,324576,
-        324576,324576,324576
-      ]
-    },
+{  
     "llama31_405b_ref_1152":
     {
       "Benchmark": "llama31_405b",
       "Creator": "NVIDIA",
-      "When": "Reference RCPs before 5.0 submission",
+      "When": "Reference RCPs after 5.0 submission",
       "Platform": "288xDGX-H100",
       "Precision": "BF16",
       "BS": 1152,
@@ -31,16 +13,16 @@
         "gradient_accumulation_steps": 144
       },
       "Epochs to converge": [
-        322560,322560,322560,
-        322560,322560,322560
+        313344,313344,313344,
+        331776,313344,294912
       ]
     },
   
     "llama31_405b_ref_2304":
     {
         "Benchmark": "llama31_405b",
         "Creator": "NVIDIA",
-        "When": "Reference RCPs before 5.0 submission",
+        "When": "Reference RCPs after 5.0 submission",
         "Platform": "288xDGX-H100",
         "Precision": "BF16",
         "BS": 2304,
@@ -50,15 +32,16 @@
           "gradient_accumulation_steps": 288
         },
         "Epochs to converge": [
-          368640,368640,368640,
-          368640,414720,414720
+          368640,350208,387072,
+          368640,368640,368640
         ]
       },
+    
     "llama31_405b_ref_4608":
     {
         "Benchmark": "llama31_405b",
         "Creator": "NVIDIA",
-        "When": "Reference RCPs before 5.0 submission",
+        "When": "Reference RCPs after 5.0 submission",
         "Platform": "288xDGX-H100",
         "Precision": "BF16",
         "BS": 4608,
@@ -68,45 +51,10 @@
           "gradient_accumulation_steps": 576
         },
         "Epochs to converge": [
-          460800,460800,506880,
-          506880,506880,506880
-        ]
-      },
-    "llama31_405b_ref_6912":
-    {
-        "Benchmark": "llama31_405b",
-        "Creator": "NVIDIA",
-        "When": "Reference RCPs before 5.0 submission",
-        "Platform": "72xDGX-H100",
-        "Precision": "BF16",
-        "BS": 6912,
-        "Hyperparams": {
-          "opt_base_learning_rate": 48e-05,
-          "opt_learning_rate_warmup_steps": 1334,
-          "gradient_accumulation_steps": 3456
-        },
-        "Epochs to converge": [
-          580608,580608,580608,
-          628992,628992,628992
-        ]
-      },
-    "llama31_405b_ref_9216":
-    {
-        "Benchmark": "llama31_405b",
-        "Creator": "NVIDIA",
-        "When": "Reference RCPs before 5.0 submission",
-        "Platform": "288xDGX-H100",
-        "Precision": "BF16",
-        "BS": 9216,
-        "Hyperparams": {
-          "opt_base_learning_rate": 64e-05,
-          "opt_learning_rate_warmup_steps": 1000,
-          "gradient_accumulation_steps": 1152
-        },
-        "Epochs to converge": [
-          645120,645120,691200,
-          691200,737280,737280
+          497664,497664,460800,
+          497664,479232,497664
         ]
       }
   }
-  
+ 
+