Skip to content

Commit b5cb220

Browse files
authored
Merge pull request #376 from mlcommons/training_v4.0
Fix scaling: prune RCPs by mean epochs
2 parents a9f4839 + 39fc7a0 commit b5cb220

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

mlperf_logging/rcp_checker/rcp_checker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def _prune_rcps(self):
232232
# Step 1
233233
# Find point with fastest convergence and prune all point with smaller batch size
234234
# In that way the min batch size point will have the fastest convergenece
235-
fastest_conv = min(min_epochs, key=lambda rc: rc['Min Epochs'])
235+
fastest_conv = min(min_epochs, key=lambda rc: rc['RCP Mean'])
236236
min_epochs = list(filter(lambda rc: rc['BS'] >= fastest_conv['BS'], min_epochs))
237237

238238
# Step 2
@@ -249,7 +249,7 @@ def _prune_rcps(self):
249249
rcp_max = min_epochs[i+1]
250250
bs = min_epochs[i]['BS']
251251
name, rcp = self._create_interp_rcp(bs, rcp_min, rcp_max)
252-
if min_epochs[i]['Min Epochs'] > rcp['Min Epochs']:
252+
if min_epochs[i]['RCP Mean'] > rcp['RCP Mean']:
253253
del min_epochs[i]
254254
i = i-1
255255
list_len = list_len - 1

0 commit comments

Comments
 (0)