Skip to content

Commit 6c66c38

Browse files
Merge pull request #764 from ecstatic-morse/triage-report-script
Improve triage report script and add a test
2 parents 7c3e205 + 6124cdc commit 6c66c38

File tree

3 files changed

+223
-80
lines changed

3 files changed

+223
-80
lines changed

.github/workflows/ci.yml

+3
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ jobs:
107107
- name: Build collector
108108
run: cargo build -p collector
109109

110+
- name: Test automated triage
111+
run: sh -x -c "ci/check-triage-script.sh"
112+
110113
- name: Check benchmarks
111114
run: sh -x -c "ci/check-profiling.sh"
112115

ci/check-triage-script.sh

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
3+
set -ex;
4+
5+
pip3 install --user msgpack
6+
7+
report=$(mktemp)
8+
first_commit=4e8a8b49ae57233bc196f3529f5184bc208c3034
9+
last_commit=f68e08933d8f519a9655934fedebbc509661b219
10+
11+
triage/weekly_report.py >"$report" $first_commit $last_commit
12+
13+
function grep_report {
14+
grep "$report" >/dev/null -e "$1"
15+
}
16+
17+
grep_report '^1 Regressions, 2 Improvements'
18+
grep_report 'regression .*end=8e9d5db8392c44a2e94008168fa3506ecddaa357'
19+
grep_report 'improvement .*end=b3aae050cd7e0c9a9eb6085bd49b02f67dc1396f'
20+
grep_report 'improvement .*end=f68e08933d8f519a9655934fedebbc509661b219'
21+
22+
rm -f "$report"

triage/weekly_report.py

+198-80
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,28 @@
11
#!/usr/bin/env python3
22

3+
from dataclasses import dataclass
34
from datetime import date
5+
from enum import Enum
46
from math import log
7+
from pathlib import Path
58
from pprint import pp
69
from sys import exit
10+
from typing import ClassVar, List
711
import argparse
812
import json
9-
import msgpack
1013
import sys
1114
import urllib.request
1215

16+
def eprint(*args, **kwargs):
17+
print(*args, file=sys.stderr, **kwargs)
18+
19+
try:
20+
import msgpack
21+
except ImportError as e:
22+
eprint(e)
23+
eprint('Try `pip3 install --user msgpack')
24+
sys.exit(1)
25+
1326
report = '''
1427
{date} Triage Log
1528
@@ -40,42 +53,173 @@
4053
'''
4154

4255
results = {
43-
'regressed': [],
44-
'improved': [],
56+
'regression': [],
57+
'improvement': [],
4558
'mixed': [],
4659
}
4760

4861

49-
def relative_change(a, b):
50-
'''Returns `ln(a / b)`
62+
def get_username():
63+
usernames = {'mackendy': 'ecstaticmorse'}
64+
65+
home_dir = Path.home().name
66+
67+
return usernames.get(home_dir) or home_dir
68+
69+
70+
class Metric(Enum):
71+
INSTRUCTIONS = 'instructions:u'
72+
73+
def human_readable(self):
74+
if self == Metric.INSTRUCTIONS:
75+
return 'instruction count'
76+
else:
77+
raise NotImplementedError
78+
79+
def direction(self, change):
80+
if self == Metric.INSTRUCTIONS:
81+
return 'regression' if change > 0 else 'improvement'
82+
else:
83+
raise NotImplementedError
84+
5185

52-
This is prefereable to percentage change, since order doesn't matter and it
53-
scales equally for positive or negative changes.
86+
def relative_change(expected, actual):
87+
'''Returns (actual - expected) / expected
5488
55-
For small changes, `ln(a / b) ≈ (b - a) / b`
89+
This is the standard definition of relative change.
5690
'''
5791

58-
return log(a / b)
92+
return (actual - expected) / expected
93+
94+
95+
def log_change(expected, actual):
96+
'''Returns `ln(actual / expected)`
97+
98+
This is prefereable to percentage change because it scales equally for
99+
positive or negative changes. This means that the order of the arguments
100+
only affects the sign of the output
101+
102+
For small changes, `log_change(a, b) ≈ relative_change(a, b)`
103+
'''
104+
105+
return log(actual / expected)
106+
107+
108+
@dataclass
109+
class BenchmarkComparison:
110+
SIGNIFICANCE_THRESHOLD: ClassVar[float] = 0.01
111+
112+
results: List[float]
113+
bench_name: str
114+
cache_state: str
115+
116+
metric: Metric = Metric.INSTRUCTIONS
117+
118+
def log_change(self):
119+
return log_change(*self.results)
120+
121+
def relative_change(self):
122+
return relative_change(*self.results)
123+
124+
def is_significant(self):
125+
return abs(self.log_change()) > self.__class__.SIGNIFICANCE_THRESHOLD
126+
127+
def is_increase(self):
128+
return self.results[1] > self.results[0]
129+
130+
def direction(self):
131+
return self.metric.direction(self.log_change())
132+
133+
def summary_line(self, link):
134+
magnitude = abs(self.log_change())
135+
if magnitude > 0.10:
136+
size = 'Very large'
137+
elif magnitude > 0.05:
138+
size = 'Large'
139+
elif magnitude > 0.01:
140+
size = 'Moderate'
141+
elif magnitude > 0.005:
142+
size = 'Small'
143+
else:
144+
size = 'Very small'
145+
146+
percent = self.relative_change() * 100
147+
return (
148+
f'{size} {self.direction()} in [{self.metric.human_readable()}s]({link})'
149+
f' (up to {percent:.1f}% on `{self.cache_state}` builds of `{self.bench_name}`)'
150+
)
151+
152+
153+
def get_benchmarks(res):
154+
ret = []
155+
data = [res[key]['data'] for key in ['a', 'b']]
156+
for bench_name in data[0].keys() & data[1].keys():
157+
# Ignore rustdoc benchmarks for now
158+
if bench_name.endswith('-doc'):
159+
continue
160+
161+
benches = [dict(datum[bench_name]) for datum in data]
162+
for cache_state in benches[0].keys() & benches[1].keys():
163+
measurements = [bench[cache_state] for bench in benches]
164+
comparison = BenchmarkComparison(measurements, bench_name,
165+
cache_state)
166+
ret.append(comparison)
167+
168+
return ret
59169

60170

61171
def gh_link(pr):
62172
return f'https://github.com/rust-lang/rust/issues/{pr}'
63173

64174

65-
def compare_link(start, end, stat='instructions:u'):
66-
return f'https://perf.rust-lang.org/compare.html?start={start}&end={end}&stat={stat}'
175+
def compare_link(start, end, stat):
176+
return f'https://perf.rust-lang.org/compare.html?start={start}&end={end}&stat={stat.value}'
177+
178+
179+
def write_section(res, *changes):
180+
pr = res['b']['pr']
181+
start = res['a']['commit']
182+
end = res['b']['commit']
183+
184+
msg = f'[#{pr}]({gh_link(pr)})'
185+
186+
for change in changes:
187+
msg += '\n- '
188+
msg += change.summary_line(compare_link(start, end, change.metric))
189+
190+
return msg
191+
192+
193+
def handle_compare(res):
194+
eprint(f"Comparing {res['b']['commit']} to {res['a']['commit']}")
195+
196+
benchmarks = get_benchmarks(res)
197+
198+
lo = min(benchmarks, key=lambda x: x.log_change())
199+
hi = max(benchmarks, key=lambda x: x.log_change())
67200

201+
changes = []
202+
if hi.is_increase():
203+
changes.append(hi)
68204

69-
def change_summary(change, status):
70-
pr = change['b']['pr']
71-
direction = status.capitalize()
72-
stat = 'instruction counts'
73-
start = change['a']['commit']
74-
end = change['b']['commit']
205+
if not lo.is_increase():
206+
changes.append(lo)
75207

76-
return '\n'.join([
77-
f'[#{pr}]({gh_link(pr)})'
78-
f'- {direction} results in [{stat}]({compare_link(start, end)}).'])
208+
changes = [c for c in changes if c.is_significant()]
209+
210+
if len(changes) == 0:
211+
return
212+
213+
# Unless all changes are going the same direction, report these results as "mixed"
214+
if len(set(c.is_increase() for c in changes)) == 1:
215+
section = changes[0].direction()
216+
else:
217+
section = 'mixed'
218+
219+
# Print biggest change first
220+
changes.sort(reverse=True, key=lambda x: abs(x.log_change()))
221+
222+
results[section].append(write_section(res, *changes))
79223

80224

81225
def make_request_payload(start, end):
@@ -88,6 +232,8 @@ def make_request_payload(start, end):
88232

89233

90234
def make_request(start, end):
235+
# FIXME: Add some sort of retry mechanism
236+
91237
req = urllib.request.Request('https://perf.rust-lang.org/perf/get')
92238
req.add_header('Content-Type', 'application/json')
93239
req.data = make_request_payload(start, end)
@@ -96,12 +242,12 @@ def make_request(start, end):
96242
return data
97243

98244

99-
def do_triage(start):
245+
def do_triage(start, end):
100246
# Get the next commit after `start` by comparing it with itself
101247
initial_response = make_request(start, start)
102248

103-
if initial_response['next'] is None:
104-
print('Failed to get first commit', file=sys.stderr)
249+
if initial_response.get('next') is None:
250+
eprint('Failed to get first commit')
105251
sys.exit(1)
106252

107253
commits = [start, initial_response['next']]
@@ -110,77 +256,49 @@ def do_triage(start):
110256
try:
111257
response = make_request(*commits)
112258
except urllib.error.HTTPError as e:
113-
print(e, file=sys.stderr)
259+
eprint(e)
114260
break
115261

116262
if not response['is_contiguous']:
117-
print('Reached a commit whose perf run is not yet complete',
118-
file=sys.stderr)
263+
eprint('Reached a commit whose perf run is not yet complete')
119264
break
120265

121266
handle_compare(response)
267+
last_reported = commits[1]
122268

123-
if 'next' not in response:
269+
if 'next' not in response or commits[1] == end:
124270
break
125271

126272
commits[0], commits[1] = commits[1], response['next']
127273

128-
print(report.format(
129-
first_commit=start, last_commit=commits[0],
130-
date=date.today().strftime("%Y-%m-%d"),
131-
num_regressions=len(results['regressed']),
132-
num_improvements=len(results['improved']),
133-
num_mixed=len(results['mixed']),
134-
num_rollups='???',
135-
regressions='\n\n'.join(results['regressed']),
136-
improvements='\n\n'.join(results['improved']),
137-
mixed='\n\n'.join(results['mixed']),
138-
username='ecstaticmorse'
139-
))
140-
141-
142-
def handle_compare(res):
143-
print(f"Comparing {res['a']['commit']}..{res['b']['commit']}", file=sys.stderr)
144-
CHANGE_THRESHOLD = 0.01
145-
146-
data = [res[key]['data'] for key in ['a', 'b']]
147-
148-
max_regression = 0
149-
max_improvement = 0
150-
for bench_name in data[0].keys() & data[1].keys():
151-
# Ignore rustdoc benchmarks for now
152-
if bench_name.endswith('-doc'):
153-
continue
154-
155-
benches = [dict(datum[bench_name]) for datum in data]
156-
for cache_state in benches[0].keys() & benches[0].keys():
157-
measurements = [bench[cache_state] for bench in benches]
158-
rel_change = relative_change(*measurements)
159-
max_regression = min(max_regression, rel_change)
160-
max_improvement = max(max_improvement, rel_change)
161-
162-
improved = abs(max_improvement) > CHANGE_THRESHOLD
163-
regressed = abs(max_regression) > CHANGE_THRESHOLD
164-
165-
if improved and regressed:
166-
status = 'mixed'
167-
elif improved:
168-
status = 'improved'
169-
elif regressed:
170-
status = 'regressed'
171-
else:
172-
return
173-
174-
handle_significant_change(res, status)
175-
274+
out = report.format(first_commit=start,
275+
last_commit=last_reported,
276+
date=date.today().strftime("%Y-%m-%d"),
277+
num_regressions=len(results['regression']),
278+
num_improvements=len(results['improvement']),
279+
num_mixed=len(results['mixed']),
280+
num_rollups='???',
281+
regressions='\n\n'.join(results['regression']),
282+
improvements='\n\n'.join(results['improvement']),
283+
mixed='\n\n'.join(results['mixed']),
284+
username=get_username())
176285

177-
def handle_significant_change(res, status):
178-
results[status].append(change_summary(res, status))
286+
print(out)
179287

180288

181289
if __name__ == '__main__' and not sys.flags.inspect:
182-
parser = argparse.ArgumentParser(description='Generate a weekly triage report')
183-
parser.add_argument('first_commit', help="the last commit of last week's triage report")
290+
parser = argparse.ArgumentParser(
291+
description='Print a weekly triage report to stdout')
292+
parser.add_argument(
293+
'first_commit',
294+
help=
295+
'The parent of the earliest commit that will be included in the report'
296+
)
297+
parser.add_argument(
298+
'last_commit',
299+
nargs='?',
300+
default=None,
301+
help='The latest commit that will be included in the report')
184302
args = parser.parse_args()
185303

186-
do_triage(start=args.first_commit)
304+
do_triage(start=args.first_commit, end=args.last_commit)

0 commit comments

Comments
 (0)