Skip to content

Commit 6b5151a

Browse files
committed
Merge remote-tracking branch 'mlcommons/master'
2 parents f705313 + bfe3268 commit 6b5151a

File tree

5 files changed

+409
-98
lines changed

5 files changed

+409
-98
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
4.1.27
1+
4.1.29
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# MLPerf compute standalone score
2+
3+
MLPerf compute standalone score
4+
5+
## Usage
6+
7+
To compute the scores of a single benchmark. All the results files are assumed to be in the same folder:
8+
9+
```sh
10+
python3 -m mlperf_logging.result_summarizer.compute_score --benchmark BENCHMARK \
11+
--system SYSTEM_NAME --benchmark_folder BENCHMARK_FOLDER --usage USAGE --ruleset RULESET \
12+
[--is_weak_scaling] [--scale] [--has_power]
13+
```
14+
15+
16+
**BENCHMARK:** Name of the benchmark to compute the score such as rgat, llama31_8b, etc.
17+
**SYSTEM_NAME:** Optional system name.
18+
**BENCHMARK_FOLDER:** Folder containing all the results files of the benchmark.
19+
**USAGE:** Either "training" or "hpc",
20+
**RULESET:** Version of the rules that applies one of "1.0.0", "1.1.0", "2.0.0", "2.1.0", "3.0.0", "3.1.0", "4.0.0", "4.1.0", "5.0.0", "5.1.0".
21+
**[--is_weak_scaling]:** Is the benchmark weak scaling (only applies to HPC).
22+
**[--scale]:** Compute the scaling.json file (only if the folder does not contain it already).
23+
**[--has_power]:** Have the results power measurements .
24+
25+
26+
27+
## Tested software versions
28+
Tested and confirmed working using the following software versions:
29+
30+
Python 3.9.18

mlperf_logging/result_summarizer/compute_score/__init__.py

Whitespace-only changes.
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
from .. import result_summarizer
2+
from ...rcp_checker import rcp_checker
3+
from ...compliance_checker.mlp_compliance import usage_choices, rule_choices
4+
from ...compliance_checker.mlp_parser import parse_file
5+
from ...benchmark_meta import get_result_file_counts
6+
import argparse
7+
import glob
8+
import json
9+
import os
10+
11+
12+
def get_compute_args():
13+
parser = argparse.ArgumentParser(
14+
prog="mlperf_logging.result_summarizer.compute_score",
15+
description="Compute the score of a single benchmark",
16+
)
17+
parser.add_argument("--system", type=str, help="System name", default=None)
18+
parser.add_argument(
19+
"--has_power", action="store_true", help="Compute power score as well"
20+
)
21+
parser.add_argument(
22+
"--benchmark_folder",
23+
type=str,
24+
help="Folder containing all the result files",
25+
required=True,
26+
)
27+
parser.add_argument(
28+
"--usage",
29+
type=str,
30+
default="training",
31+
choices=usage_choices(),
32+
help="the usage such as training, hpc, inference_edge, inference_server",
33+
required=True,
34+
)
35+
parser.add_argument(
36+
"--ruleset",
37+
type=str,
38+
choices=rule_choices(),
39+
help="the ruleset such as 0.6.0, 0.7.0, or 1.0.0",
40+
required=True,
41+
)
42+
parser.add_argument(
43+
"--is_weak_scaling", action="store_true", help="Compute weak scaling score"
44+
)
45+
parser.add_argument(
46+
"--scale", action="store_true", help="Compute the scaling factor"
47+
)
48+
49+
return parser.parse_args()
50+
51+
52+
def print_benchmark_info(args, benchmark):
53+
print("INFO -------------------------------------------------------")
54+
print(f"MLPerf {args.usage}")
55+
print(f"Folder: {args.benchmark_folder}")
56+
print(f"Version: {args.ruleset}")
57+
print(f"System: {args.system}")
58+
print(f"Benchmark: {benchmark}")
59+
print("-------------------------------------------------------------")
60+
61+
62+
def _reset_scaling(results_dir):
63+
filepath = results_dir + "/scaling.json"
64+
if os.path.exists(filepath):
65+
os.remove(filepath)
66+
67+
68+
def _get_scaling_factor(results_dir):
69+
scaling_factor = 1.0
70+
scaling_file = results_dir + "/scaling.json"
71+
if os.path.exists(scaling_file):
72+
with open(scaling_file, "r") as f:
73+
contents = json.load(f)
74+
scaling_factor = contents["scaling_factor"]
75+
return scaling_factor
76+
77+
78+
def _find_benchmark(result_file, ruleset):
79+
loglines, _ = parse_file(result_file, ruleset)
80+
benchmark = None
81+
for logline in loglines:
82+
if logline.key == "submission_benchmark":
83+
benchmark = logline.value["value"]
84+
break
85+
if benchmark is None:
86+
raise ValueError("Benchmark not specified in result file")
87+
return benchmark
88+
89+
90+
def _epochs_samples_to_converge(result_file, ruleset):
91+
loglines, _ = parse_file(result_file, ruleset)
92+
epoch_num = None
93+
samples_count = None
94+
for logline in loglines:
95+
if logline.key == "eval_accuracy":
96+
if "epoch_num" in logline.value["metadata"]:
97+
epoch_num = logline.value["metadata"]["epoch_num"]
98+
if "samples_count" in logline.value["metadata"]:
99+
samples_count = logline.value["metadata"]["samples_count"]
100+
if samples_count is not None:
101+
return samples_count
102+
if epoch_num is not None:
103+
return epoch_num
104+
raise ValueError(
105+
"Not enough values specified in result file. One of ('samples_count')"
106+
"or ('epoch_num') is needed"
107+
)
108+
109+
110+
args = get_compute_args()
111+
_reset_scaling(args.benchmark_folder)
112+
pattern = "{folder}/result_*.txt".format(folder=args.benchmark_folder)
113+
result_files = glob.glob(pattern, recursive=True)
114+
benchmark = _find_benchmark(result_files[0], args.ruleset)
115+
required_runs = get_result_file_counts(args.usage)[benchmark]
116+
if required_runs > len(result_files):
117+
print(
118+
f"WARNING: Not enough runs found for an official submission."
119+
f" Found: {len(result_files)}, required: {required_runs}"
120+
)
121+
122+
if args.scale:
123+
rcp_checker.check_directory(
124+
args.benchmark_folder,
125+
args.usage,
126+
args.ruleset,
127+
False,
128+
False,
129+
rcp_file=None,
130+
rcp_pass="pruned_rcps",
131+
rcp_bypass=False,
132+
set_scaling=True,
133+
)
134+
135+
scaling_factor = _get_scaling_factor(args.benchmark_folder)
136+
137+
if args.is_weak_scaling:
138+
scores, power_scores = result_summarizer._compute_weak_score_standalone(
139+
benchmark,
140+
args.system,
141+
args.has_power,
142+
args.benchmark_folder,
143+
args.usage,
144+
args.ruleset,
145+
)
146+
print_benchmark_info(args, benchmark)
147+
print(f"Scores: {scores}")
148+
if power_scores:
149+
print(f"Power Scores - Energy (kJ): {power_scores}")
150+
else:
151+
scores_track, power_scores_track, score, power_score = (
152+
result_summarizer._compute_strong_score_standalone(
153+
benchmark,
154+
args.system,
155+
args.has_power,
156+
args.benchmark_folder,
157+
args.usage,
158+
args.ruleset,
159+
return_full_scores=True,
160+
)
161+
)
162+
print_benchmark_info(args, benchmark)
163+
mean_score = 0
164+
for file, s in scores_track.items():
165+
epochs_samples_to_converge = _epochs_samples_to_converge(file, args.ruleset)
166+
print(
167+
f"Score - Time to Train (minutes) for {file}: {s}. Samples/Epochs to converge: {epochs_samples_to_converge}"
168+
)
169+
mean_score += s
170+
mean_score /= len(result_files)
171+
mean_score *= scaling_factor
172+
if required_runs > len(result_files):
173+
print("WARNING: Olympic scoring skipped")
174+
print(f"Final score - Time to Train (minutes): {mean_score}")
175+
else:
176+
print(f"Final score - Time to Train (minutes): {score}")
177+
if power_score:
178+
mean_power = 0
179+
for file, ps in power_scores_track.items():
180+
print(f"Power Score - Energy (kJ) for {file}: {ps}")
181+
mean_power += ps
182+
mean_power /= len(result_files)
183+
mean_power *= scaling_factor
184+
if required_runs > len(result_files):
185+
print("WARNING: Olympic scoring skipped")
186+
print(f"Final score - Time to Train (minutes): {mean_power}")
187+
else:
188+
print(f"Power Score - Energy (kJ): {power_score}")

0 commit comments

Comments
 (0)