1
1
#!/usr/bin/env python3
2
2
3
+ from dataclasses import dataclass
3
4
from datetime import date
5
+ from enum import Enum
4
6
from math import log
7
+ from pathlib import Path
5
8
from pprint import pp
6
9
from sys import exit
10
+ from typing import ClassVar , List
7
11
import argparse
8
12
import json
9
- import msgpack
10
13
import sys
11
14
import urllib .request
12
15
16
+ def eprint (* args , ** kwargs ):
17
+ print (* args , file = sys .stderr , ** kwargs )
18
+
19
+ try :
20
+ import msgpack
21
+ except ImportError as e :
22
+ eprint (e )
23
+ eprint ('Try `pip3 install --user msgpack' )
24
+ sys .exit (1 )
25
+
13
26
report = '''
14
27
{date} Triage Log
15
28
40
53
'''
41
54
42
55
results = {
43
- 'regressed ' : [],
44
- 'improved ' : [],
56
+ 'regression ' : [],
57
+ 'improvement ' : [],
45
58
'mixed' : [],
46
59
}
47
60
48
61
49
- def relative_change (a , b ):
50
- '''Returns `ln(a / b)`
62
+ def get_username ():
63
+ usernames = {'mackendy' : 'ecstaticmorse' }
64
+
65
+ home_dir = Path .home ().name
66
+
67
+ return usernames .get (home_dir ) or home_dir
68
+
69
+
70
+ class Metric (Enum ):
71
+ INSTRUCTIONS = 'instructions:u'
72
+
73
+ def human_readable (self ):
74
+ if self == Metric .INSTRUCTIONS :
75
+ return 'instruction count'
76
+ else :
77
+ raise NotImplementedError
78
+
79
+ def direction (self , change ):
80
+ if self == Metric .INSTRUCTIONS :
81
+ return 'regression' if change > 0 else 'improvement'
82
+ else :
83
+ raise NotImplementedError
84
+
51
85
52
- This is prefereable to percentage change, since order doesn't matter and it
53
- scales equally for positive or negative changes.
86
+ def relative_change ( expected , actual ):
87
+ '''Returns (actual - expected) / expected
54
88
55
- For small changes, `ln(a / b) ≈ (b - a) / b`
89
+ This is the standard definition of relative change.
56
90
'''
57
91
58
- return log (a / b )
92
+ return (actual - expected ) / expected
93
+
94
+
95
+ def log_change (expected , actual ):
96
+ '''Returns `ln(actual / expected)`
97
+
98
+ This is prefereable to percentage change because it scales equally for
99
+ positive or negative changes. This means that the order of the arguments
100
+ only affects the sign of the output
101
+
102
+ For small changes, `log_change(a, b) ≈ relative_change(a, b)`
103
+ '''
104
+
105
+ return log (actual / expected )
106
+
107
+
108
+ @dataclass
109
+ class BenchmarkComparison :
110
+ SIGNIFICANCE_THRESHOLD : ClassVar [float ] = 0.01
111
+
112
+ results : List [float ]
113
+ bench_name : str
114
+ cache_state : str
115
+
116
+ metric : Metric = Metric .INSTRUCTIONS
117
+
118
+ def log_change (self ):
119
+ return log_change (* self .results )
120
+
121
+ def relative_change (self ):
122
+ return relative_change (* self .results )
123
+
124
+ def is_significant (self ):
125
+ return abs (self .log_change ()) > self .__class__ .SIGNIFICANCE_THRESHOLD
126
+
127
+ def is_increase (self ):
128
+ return self .results [1 ] > self .results [0 ]
129
+
130
+ def direction (self ):
131
+ return self .metric .direction (self .log_change ())
132
+
133
+ def summary_line (self , link ):
134
+ magnitude = abs (self .log_change ())
135
+ if magnitude > 0.10 :
136
+ size = 'Very large'
137
+ elif magnitude > 0.05 :
138
+ size = 'Large'
139
+ elif magnitude > 0.01 :
140
+ size = 'Moderate'
141
+ elif magnitude > 0.005 :
142
+ size = 'Small'
143
+ else :
144
+ size = 'Very small'
145
+
146
+ percent = self .relative_change () * 100
147
+ return (
148
+ f'{ size } { self .direction ()} in [{ self .metric .human_readable ()} s]({ link } )'
149
+ f' (up to { percent :.1f} % on `{ self .cache_state } ` builds of `{ self .bench_name } `)'
150
+ )
151
+
152
+
153
+ def get_benchmarks (res ):
154
+ ret = []
155
+ data = [res [key ]['data' ] for key in ['a' , 'b' ]]
156
+ for bench_name in data [0 ].keys () & data [1 ].keys ():
157
+ # Ignore rustdoc benchmarks for now
158
+ if bench_name .endswith ('-doc' ):
159
+ continue
160
+
161
+ benches = [dict (datum [bench_name ]) for datum in data ]
162
+ for cache_state in benches [0 ].keys () & benches [1 ].keys ():
163
+ measurements = [bench [cache_state ] for bench in benches ]
164
+ comparison = BenchmarkComparison (measurements , bench_name ,
165
+ cache_state )
166
+ ret .append (comparison )
167
+
168
+ return ret
59
169
60
170
61
171
def gh_link (pr ):
62
172
return f'https://github.com/rust-lang/rust/issues/{ pr } '
63
173
64
174
65
- def compare_link (start , end , stat = 'instructions:u' ):
66
- return f'https://perf.rust-lang.org/compare.html?start={ start } &end={ end } &stat={ stat } '
175
+ def compare_link (start , end , stat ):
176
+ return f'https://perf.rust-lang.org/compare.html?start={ start } &end={ end } &stat={ stat .value } '
177
+
178
+
179
+ def write_section (res , * changes ):
180
+ pr = res ['b' ]['pr' ]
181
+ start = res ['a' ]['commit' ]
182
+ end = res ['b' ]['commit' ]
183
+
184
+ msg = f'[#{ pr } ]({ gh_link (pr )} )'
185
+
186
+ for change in changes :
187
+ msg += '\n - '
188
+ msg += change .summary_line (compare_link (start , end , change .metric ))
189
+
190
+ return msg
191
+
192
+
193
+ def handle_compare (res ):
194
+ eprint (f"Comparing { res ['b' ]['commit' ]} to { res ['a' ]['commit' ]} " )
195
+
196
+ benchmarks = get_benchmarks (res )
197
+
198
+ lo = min (benchmarks , key = lambda x : x .log_change ())
199
+ hi = max (benchmarks , key = lambda x : x .log_change ())
67
200
201
+ changes = []
202
+ if hi .is_increase ():
203
+ changes .append (hi )
68
204
69
- def change_summary (change , status ):
70
- pr = change ['b' ]['pr' ]
71
- direction = status .capitalize ()
72
- stat = 'instruction counts'
73
- start = change ['a' ]['commit' ]
74
- end = change ['b' ]['commit' ]
205
+ if not lo .is_increase ():
206
+ changes .append (lo )
75
207
76
- return '\n ' .join ([
77
- f'[#{ pr } ]({ gh_link (pr )} )'
78
- f'- { direction } results in [{ stat } ]({ compare_link (start , end )} ).' ])
208
+ changes = [c for c in changes if c .is_significant ()]
209
+
210
+ if len (changes ) == 0 :
211
+ return
212
+
213
+ # Unless all changes are going the same direction, report these results as "mixed"
214
+ if len (set (c .is_increase () for c in changes )) == 1 :
215
+ section = changes [0 ].direction ()
216
+ else :
217
+ section = 'mixed'
218
+
219
+ # Print biggest change first
220
+ changes .sort (reverse = True , key = lambda x : abs (x .log_change ()))
221
+
222
+ results [section ].append (write_section (res , * changes ))
79
223
80
224
81
225
def make_request_payload (start , end ):
@@ -88,6 +232,8 @@ def make_request_payload(start, end):
88
232
89
233
90
234
def make_request (start , end ):
235
+ # FIXME: Add some sort of retry mechanism
236
+
91
237
req = urllib .request .Request ('https://perf.rust-lang.org/perf/get' )
92
238
req .add_header ('Content-Type' , 'application/json' )
93
239
req .data = make_request_payload (start , end )
@@ -96,12 +242,12 @@ def make_request(start, end):
96
242
return data
97
243
98
244
99
- def do_triage (start ):
245
+ def do_triage (start , end ):
100
246
# Get the next commit after `start` by comparing it with itself
101
247
initial_response = make_request (start , start )
102
248
103
- if initial_response [ 'next' ] is None :
104
- print ('Failed to get first commit' , file = sys . stderr )
249
+ if initial_response . get ( 'next' ) is None :
250
+ eprint ('Failed to get first commit' )
105
251
sys .exit (1 )
106
252
107
253
commits = [start , initial_response ['next' ]]
@@ -110,77 +256,49 @@ def do_triage(start):
110
256
try :
111
257
response = make_request (* commits )
112
258
except urllib .error .HTTPError as e :
113
- print ( e , file = sys . stderr )
259
+ eprint ( e )
114
260
break
115
261
116
262
if not response ['is_contiguous' ]:
117
- print ('Reached a commit whose perf run is not yet complete' ,
118
- file = sys .stderr )
263
+ eprint ('Reached a commit whose perf run is not yet complete' )
119
264
break
120
265
121
266
handle_compare (response )
267
+ last_reported = commits [1 ]
122
268
123
- if 'next' not in response :
269
+ if 'next' not in response or commits [ 1 ] == end :
124
270
break
125
271
126
272
commits [0 ], commits [1 ] = commits [1 ], response ['next' ]
127
273
128
- print (report .format (
129
- first_commit = start , last_commit = commits [0 ],
130
- date = date .today ().strftime ("%Y-%m-%d" ),
131
- num_regressions = len (results ['regressed' ]),
132
- num_improvements = len (results ['improved' ]),
133
- num_mixed = len (results ['mixed' ]),
134
- num_rollups = '???' ,
135
- regressions = '\n \n ' .join (results ['regressed' ]),
136
- improvements = '\n \n ' .join (results ['improved' ]),
137
- mixed = '\n \n ' .join (results ['mixed' ]),
138
- username = 'ecstaticmorse'
139
- ))
140
-
141
-
142
- def handle_compare (res ):
143
- print (f"Comparing { res ['a' ]['commit' ]} ..{ res ['b' ]['commit' ]} " , file = sys .stderr )
144
- CHANGE_THRESHOLD = 0.01
145
-
146
- data = [res [key ]['data' ] for key in ['a' , 'b' ]]
147
-
148
- max_regression = 0
149
- max_improvement = 0
150
- for bench_name in data [0 ].keys () & data [1 ].keys ():
151
- # Ignore rustdoc benchmarks for now
152
- if bench_name .endswith ('-doc' ):
153
- continue
154
-
155
- benches = [dict (datum [bench_name ]) for datum in data ]
156
- for cache_state in benches [0 ].keys () & benches [0 ].keys ():
157
- measurements = [bench [cache_state ] for bench in benches ]
158
- rel_change = relative_change (* measurements )
159
- max_regression = min (max_regression , rel_change )
160
- max_improvement = max (max_improvement , rel_change )
161
-
162
- improved = abs (max_improvement ) > CHANGE_THRESHOLD
163
- regressed = abs (max_regression ) > CHANGE_THRESHOLD
164
-
165
- if improved and regressed :
166
- status = 'mixed'
167
- elif improved :
168
- status = 'improved'
169
- elif regressed :
170
- status = 'regressed'
171
- else :
172
- return
173
-
174
- handle_significant_change (res , status )
175
-
274
+ out = report .format (first_commit = start ,
275
+ last_commit = last_reported ,
276
+ date = date .today ().strftime ("%Y-%m-%d" ),
277
+ num_regressions = len (results ['regression' ]),
278
+ num_improvements = len (results ['improvement' ]),
279
+ num_mixed = len (results ['mixed' ]),
280
+ num_rollups = '???' ,
281
+ regressions = '\n \n ' .join (results ['regression' ]),
282
+ improvements = '\n \n ' .join (results ['improvement' ]),
283
+ mixed = '\n \n ' .join (results ['mixed' ]),
284
+ username = get_username ())
176
285
177
- def handle_significant_change (res , status ):
178
- results [status ].append (change_summary (res , status ))
286
+ print (out )
179
287
180
288
181
289
if __name__ == '__main__' and not sys .flags .inspect :
182
- parser = argparse .ArgumentParser (description = 'Generate a weekly triage report' )
183
- parser .add_argument ('first_commit' , help = "the last commit of last week's triage report" )
290
+ parser = argparse .ArgumentParser (
291
+ description = 'Print a weekly triage report to stdout' )
292
+ parser .add_argument (
293
+ 'first_commit' ,
294
+ help =
295
+ 'The parent of the earliest commit that will be included in the report'
296
+ )
297
+ parser .add_argument (
298
+ 'last_commit' ,
299
+ nargs = '?' ,
300
+ default = None ,
301
+ help = 'The latest commit that will be included in the report' )
184
302
args = parser .parse_args ()
185
303
186
- do_triage (start = args .first_commit )
304
+ do_triage (start = args .first_commit , end = args . last_commit )
0 commit comments