11from .. import result_summarizer
22from ...rcp_checker import rcp_checker
33from ...compliance_checker .mlp_compliance import usage_choices , rule_choices
4+ from ...compliance_checker .mlp_parser import parse_file
5+ from ...benchmark_meta import get_result_file_counts
46import argparse
7+ import glob
8+ import json
9+ import os
510
611
712def get_compute_args ():
813 parser = argparse .ArgumentParser (
914 prog = "mlperf_logging.result_summarizer.compute_score" ,
1015 description = "Compute the score of a single benchmark" ,
1116 )
12- parser .add_argument (
13- "--benchmark" ,
14- type = str ,
15- help = "Benchmark to compute the score such as rgat, llama31_8b, etc." ,
16- required = True ,
17- )
1817 parser .add_argument ("--system" , type = str , help = "System name" , default = None )
1918 parser .add_argument (
2019 "--has_power" , action = "store_true" , help = "Compute power score as well"
@@ -50,15 +49,55 @@ def get_compute_args():
5049 return parser .parse_args ()
5150
5251
53- def print_benchmark_info (args ):
52+ def print_benchmark_info (args , benchmark ):
53+ print ("INFO -------------------------------------------------------" )
5454 print (f"MLPerf { args .usage } " )
5555 print (f"Folder: { args .benchmark_folder } " )
5656 print (f"Version: { args .ruleset } " )
5757 print (f"System: { args .system } " )
58- print (f"Benchmark: { args .benchmark } " )
58+ print (f"Benchmark: { benchmark } " )
59+ print ("-------------------------------------------------------------" )
60+
61+
62+ def _reset_scaling (results_dir ):
63+ filepath = results_dir + "/scaling.json"
64+ if os .path .exists (filepath ):
65+ os .remove (filepath )
66+
67+
68+ def _get_scaling_factor (results_dir ):
69+ scaling_factor = 1.0
70+ scaling_file = results_dir + "/scaling.json"
71+ if os .path .exists (scaling_file ):
72+ with open (scaling_file , "r" ) as f :
73+ contents = json .load (f )
74+ scaling_factor = contents ["scaling_factor" ]
75+ return scaling_factor
76+
77+
78+ def _find_benchmark (result_file , ruleset ):
79+ loglines , _ = parse_file (result_file , ruleset )
80+ benchmark = None
81+ for logline in loglines :
82+ if logline .key == "submission_benchmark" :
83+ benchmark = logline .value ["value" ]
84+ break
85+ if benchmark is None :
86+ raise ValueError ("Benchmark not specified in result file" )
87+ return benchmark
5988
6089
6190args = get_compute_args ()
91+ _reset_scaling (args .benchmark_folder )
92+ pattern = "{folder}/result_*.txt" .format (folder = args .benchmark_folder )
93+ result_files = glob .glob (pattern , recursive = True )
94+ benchmark = _find_benchmark (result_files [0 ], args .ruleset )
95+ required_runs = get_result_file_counts (args .usage )[benchmark ]
96+ if required_runs > len (result_files ):
97+ print (
98+ f"WARNING: Not enough runs found for an official submission."
99+ f" Found: { len (result_files )} , required: { required_runs } "
100+ )
62101
63102if args .scale :
64103 rcp_checker .check_directory (
@@ -73,29 +112,54 @@ def print_benchmark_info(args):
73112 set_scaling = True ,
74113 )
75114
115+ scaling_factor = _get_scaling_factor (args .benchmark_folder )
116+
76117if args .is_weak_scaling :
77118 scores , power_scores = result_summarizer ._compute_weak_score_standalone (
78- args . benchmark ,
119+ benchmark ,
79120 args .system ,
80121 args .has_power ,
81122 args .benchmark_folder ,
82123 args .usage ,
83124 args .ruleset ,
84125 )
85- print_benchmark_info (args )
126+ print_benchmark_info (args , benchmark )
86127 print (f"Scores: { scores } " )
87128 if power_scores :
88129 print (f"Power Scores - Energy (kJ): { power_scores } " )
89130else :
90- score , power_score = result_summarizer ._compute_strong_score_standalone (
91- args .benchmark ,
92- args .system ,
93- args .has_power ,
94- args .benchmark_folder ,
95- args .usage ,
96- args .ruleset ,
131+ scores_track , power_scores_track , score , power_score = (
132+ result_summarizer ._compute_strong_score_standalone (
133+ benchmark ,
134+ args .system ,
135+ args .has_power ,
136+ args .benchmark_folder ,
137+ args .usage ,
138+ args .ruleset ,
139+ return_full_scores = True ,
140+ )
97141 )
98- print_benchmark_info (args )
99- print (f"Score - Time to Train (minutes): { score } " )
142+ print_benchmark_info (args , benchmark )
143+ mean_score = 0
144+ for file , s in scores_track .items ():
145+ print (f"Score - Time to Train (minutes) for { file } : { s } " )
146+ mean_score += s
147+ mean_score /= len (result_files )
148+ mean_score *= scaling_factor
149+ if required_runs > len (result_files ):
150+ print ("WARNING: Olympic scoring skipped" )
151+ print (f"Final score - Time to Train (minutes): { mean_score } " )
152+ else :
153+ print (f"Final score - Time to Train (minutes): { score } " )
100154 if power_score :
101- print (f"Power Score - Energy (kJ): { power_score } " )
155+ mean_power = 0
156+ for file , ps in power_scores_track .items ():
157+ print (f"Power Score - Energy (kJ) for { file } : { ps } " )
158+ mean_power += ps
159+ mean_power /= len (result_files )
160+ mean_power *= scaling_factor
161+ if required_runs > len (result_files ):
162+ print ("WARNING: Olympic scoring skipped" )
163+ print (f"Final score - Time to Train (minutes): { mean_power } " )
164+ else :
165+ print (f"Power Score - Energy (kJ): { power_score } " )
0 commit comments