Skip to content

Commit d363daf

Browse files
authored
Merge pull request #459 from mdboom/bisect-by-stats
Allow for bisecting by pystats
2 parents fc764df + 2959655 commit d363daf

File tree

2 files changed

+99
-29
lines changed

2 files changed

+99
-29
lines changed

bench_runner/scripts/bisect.py

Lines changed: 93 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import argparse
22
import contextlib
3+
import json
34
from pathlib import Path
45
import subprocess
56
import sys
@@ -12,18 +13,27 @@
1213

1314
from bench_runner import flags as mflags
1415
from bench_runner import git
15-
from bench_runner import result
16+
from bench_runner import result as mresult
1617
from bench_runner.scripts import run_benchmarks
1718
from bench_runner.scripts import workflow
1819
from bench_runner.util import PathLike, format_seconds
1920

2021

22+
def format_result(result: float | int, pystats: str | None = None) -> str:
23+
if pystats is None:
24+
return format_seconds(result)
25+
else:
26+
return f"{result:,}"
27+
28+
2129
def _get_result_commandline(
2230
benchmark: str,
2331
good_val: float,
2432
bad_val: float,
2533
pgo: bool,
2634
flags: str,
35+
pystats: str | None,
36+
invert: bool,
2737
repo: PathLike,
2838
) -> list[str]:
2939
repo = Path(repo)
@@ -37,16 +47,18 @@ def _get_result_commandline(
3747
str(bad_val),
3848
str(pgo),
3949
str(flags),
50+
str(pystats),
51+
str(invert),
4052
str(repo.absolute()),
4153
]
4254

4355

44-
def parse_result(benchmark_json: PathLike) -> float:
56+
def parse_timing_result(benchmark_json: PathLike) -> float:
4557
# The name of the benchmark in the JSON file may be different from the one
4658
# used to select the benchmark. Therefore, just take the mean of all the
4759
# benchmarks in the JSON file.
48-
result.clear_contents_cache()
49-
r = result.Result.from_arbitrary_filename(benchmark_json)
60+
mresult.clear_contents_cache()
61+
r = mresult.Result.from_arbitrary_filename(benchmark_json)
5062
timing_data = r.get_timing_data()
5163
return float(np.mean([x.mean() for x in timing_data.values()]))
5264

@@ -55,22 +67,34 @@ def get_result(
5567
benchmark: str,
5668
pgo: bool = False,
5769
flags: str = "",
70+
pystats: str | None = None,
5871
cpython: PathLike = Path("cpython"),
5972
reconfigure: bool = False,
6073
) -> float:
6174
cpython = Path(cpython)
75+
python = cpython / "python"
76+
parsed_flags = mflags.parse_flags(flags)
6277

6378
if pgo or reconfigure:
6479
# Jumping around through commits with PGO can leave stale PGO data
6580
# around, so we need to clean it each time. We also always do it the
6681
# first time in case the *last* bisect run used pgo.
6782
subprocess.run(["make", "clean"], cwd=cpython)
6883

69-
workflow.compile_unix(cpython, mflags.parse_flags(flags), pgo, False, reconfigure)
70-
run_benchmarks.run_benchmarks(cpython / "python", benchmark)
71-
timing = parse_result(run_benchmarks.BENCHMARK_JSON)
84+
workflow.compile_unix(cpython, parsed_flags, pgo, pystats is not None, reconfigure)
7285

73-
return timing
86+
if pystats is None:
87+
run_benchmarks.run_benchmarks(python, benchmark)
88+
return parse_timing_result(run_benchmarks.BENCHMARK_JSON)
89+
else:
90+
run_benchmarks.collect_pystats(python, benchmark)
91+
summarize_stats_path = cpython / "Tools" / "scripts" / "summarize_stats.py"
92+
subprocess.check_output(
93+
[str(python), str(summarize_stats_path), "--json-output", "pystats.json"]
94+
)
95+
with open("pystats.json", "r") as f:
96+
contents = json.load(f)
97+
return int(contents[pystats])
7498

7599

76100
def get_log_file() -> Path:
@@ -103,11 +127,14 @@ def _main(
103127
bad: str,
104128
pgo: bool = False,
105129
flags: str = "",
130+
pystats: str | None = None,
131+
invert: bool = False,
106132
repo: PathLike = Path("."),
107133
cpython: PathLike = Path("cpython"),
108134
):
109135
repo = Path(repo).absolute()
110136
cpython = Path(cpython).absolute()
137+
im = -1 if invert else 1
111138

112139
delete_log()
113140

@@ -117,17 +144,21 @@ def _main(
117144
)
118145

119146
git.checkout(cpython, good)
120-
good_timing = get_result(benchmark, pgo, flags, cpython=cpython, reconfigure=True)
121-
log(f"KNOWN GOOD ({good[:7]}): {format_seconds(good_timing)}")
147+
good_result = get_result(
148+
benchmark, pgo, flags, pystats, cpython=cpython, reconfigure=True
149+
)
150+
log(f"KNOWN GOOD ({good[:7]}): {format_result(good_result, pystats)}")
122151

123152
git.checkout(cpython, bad)
124-
bad_timing = get_result(benchmark, pgo, flags, cpython=cpython)
125-
log(f"KNOWN BAD ({bad[:7]}): {format_seconds(bad_timing)}")
153+
bad_result = get_result(benchmark, pgo, flags, pystats, cpython=cpython)
154+
log(f"KNOWN BAD ({bad[:7]}): {format_result(bad_result, pystats)}")
126155

127-
if good_timing >= bad_timing:
156+
if im * good_result >= im * bad_result:
128157
show_log()
129158
raise ValueError(
130-
f"Good timing ({good_timing}) must be less than bad timing ({bad_timing})."
159+
f"Good result ({format_result(good_result, pystats)}) "
160+
f"must be {'more' if invert else 'less'} than "
161+
f"bad result ({format_result(bad_result, pystats)})."
131162
)
132163

133164
try:
@@ -138,7 +169,14 @@ def _main(
138169
subprocess.run(
139170
["git", "bisect", "run"]
140171
+ _get_result_commandline(
141-
benchmark, good_timing, bad_timing, pgo, flags, repo
172+
benchmark,
173+
good_result,
174+
bad_result,
175+
pgo,
176+
flags,
177+
pystats,
178+
invert,
179+
repo,
142180
)
143181
)
144182
finally:
@@ -182,10 +220,29 @@ def main():
182220
type=str,
183221
default="",
184222
)
223+
parser.add_argument(
224+
"--pystats",
225+
type=str,
226+
help="Bisect using pystats. Should be the key in the pystats.json file.",
227+
)
228+
parser.add_argument(
229+
"--invert",
230+
action="store_true",
231+
help="Invert the values, i.e. expect the bad value to be lower than "
232+
"the good value.",
233+
)
185234

186235
args = parser.parse_args()
187236

188-
_main(args.benchmark, args.good, args.bad, args.pgo, args.flags)
237+
_main(
238+
args.benchmark,
239+
args.good,
240+
args.bad,
241+
args.pgo,
242+
args.flags,
243+
args.pystats,
244+
args.invert,
245+
)
189246

190247

191248
if __name__ == "__main__":
@@ -197,18 +254,30 @@ def main():
197254
parser.add_argument("bad_val", type=float)
198255
parser.add_argument("pgo", type=str)
199256
parser.add_argument("flags", type=str)
257+
parser.add_argument("pystats", type=str)
258+
parser.add_argument("invert", type=str, choices=["True", "False"])
200259
parser.add_argument("repo", type=str)
201260
args = parser.parse_args()
202261

262+
if args.pystats == "None":
263+
args.pystats = None
264+
265+
invert = args.invert == "True"
266+
im = -1 if invert else 1
267+
203268
mid_point = (args.good_val + args.bad_val) / 2.0
204269

205270
repo = Path(args.repo)
206271
cpython = repo / "cpython"
207272

208273
try:
209274
with contextlib.chdir(repo):
210-
timing = get_result(
211-
args.benchmark, args.pgo == "True", args.flags, cpython=cpython
275+
result = get_result(
276+
args.benchmark,
277+
args.pgo == "True",
278+
args.flags,
279+
args.pystats,
280+
cpython=cpython,
212281
)
213282
except Exception as e:
214283
# If there was any exception, display that exception and traceback and
@@ -218,20 +287,20 @@ def main():
218287

219288
# The confidence is 0.0 at the mid-point, 1.0 at the good and bad values,
220289
# and > 1.0 outside of that.
221-
confidence = abs((timing - mid_point) / ((args.bad_val - args.good_val) / 2.0))
290+
confidence = abs((result - mid_point) / ((args.bad_val - args.good_val) / 2.0))
222291

223292
with contextlib.chdir(repo):
224-
if timing > mid_point:
293+
if im * result > im * mid_point:
225294
log(
226295
f"BISECT BAD ({git.get_git_hash(cpython)[:7]}): "
227-
f"{format_seconds(timing)} (confidence {confidence:.02f})"
296+
f"{format_result(result, args.pystats)} (confidence {confidence:.02f})"
228297
)
229-
print(f"BAD: {timing} vs. ({args.good_val}, {args.bad_val})")
298+
print(f"BAD: {result} vs. ({args.good_val}, {args.bad_val})")
230299
sys.exit(1)
231300
else:
232301
log(
233302
f"BISECT GOOD ({git.get_git_hash(cpython)[:7]}): "
234-
f"{format_seconds(timing)} (confidence {confidence:.02f})"
303+
f"{format_result(result, args.pystats)} (confidence {confidence:.02f})"
235304
)
236-
print(f"GOOD: {timing} vs. ({args.good_val}, {args.bad_val})")
305+
print(f"GOOD: {result} vs. ({args.good_val}, {args.bad_val})")
237306
sys.exit(0)

bench_runner/scripts/run_benchmarks.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,9 @@ def run_benchmarks(
131131
def collect_pystats(
132132
python: PathLike,
133133
benchmarks: str,
134-
fork: str,
135-
ref: str,
136-
individual: bool,
134+
fork: str | None = None,
135+
ref: str | None = None,
136+
individual: bool = True,
137137
flags: Iterable[str] | None = None,
138138
) -> None:
139139
pystats_dir = Path("/tmp/py_stats")
@@ -167,7 +167,7 @@ def collect_pystats(
167167
except NoBenchmarkError:
168168
pass
169169
else:
170-
if individual:
170+
if individual and fork is not None and ref is not None:
171171
run_summarize_stats(python, fork, ref, benchmark, flags=flags)
172172

173173
for filename in pystats_dir.iterdir():
@@ -181,7 +181,8 @@ def collect_pystats(
181181
else:
182182
benchmark_links = []
183183

184-
run_summarize_stats(python, fork, ref, "all", benchmark_links, flags=flags)
184+
if fork is not None and ref is not None:
185+
run_summarize_stats(python, fork, ref, "all", benchmark_links, flags=flags)
185186

186187

187188
def get_perf_lines(files: Iterable[PathLike]) -> Iterable[str]:

0 commit comments

Comments
 (0)