@@ -87,6 +87,29 @@ def _find_benchmark(result_file, ruleset):
8787 return benchmark
8888
8989
90+ def _find_samples_to_converge (result_file , ruleset ):
91+ loglines , _ = parse_file (result_file , ruleset )
92+ train_samples = None
93+ epoch_num = None
94+ samples_count = None
95+ for logline in loglines :
96+ if logline .key == "train_samples" :
97+ train_samples = logline .value ["value" ]
98+ if logline .key == "eval_accuracy" :
99+ if "epoch_num" in logline .value ["metadata" ]:
100+ epoch_num = logline .value ["metadata" ]["epoch_num" ]
101+ if "samples_count" in logline .value ["metadata" ]:
102+ samples_count = logline .value ["metadata" ]["samples_count" ]
103+ if samples_count is not None :
104+ return samples_count
105+ if train_samples is not None and epoch_num is not None :
106+ return train_samples * epoch_num
107+ raise ValueError (
108+ "Not enough values specified in result file. One of ('samples_count')"
109+ "or ('train_samples' and 'epoch_num') is needed"
110+ )
111+
112+
90113args = get_compute_args ()
91114_reset_scaling (args .benchmark_folder )
92115pattern = "{folder}/result_*.txt" .format (folder = args .benchmark_folder )
@@ -142,7 +165,10 @@ def _find_benchmark(result_file, ruleset):
142165 print_benchmark_info (args , benchmark )
143166 mean_score = 0
144167 for file , s in scores_track .items ():
145- print (f"Score - Time to Train (minutes) for { file } : { s } " )
168+ samples_to_converge = _find_samples_to_converge (file , args .ruleset )
169+ print (
170+ f"Score - Time to Train (minutes) for { file } : { s } . Samples to converge: { samples_to_converge } "
171+ )
146172 mean_score += s
147173 mean_score /= len (result_files )
148174 mean_score *= scaling_factor
0 commit comments