1
+ import importlib .util
1
2
import logging
2
- from datetime import datetime , timedelta
3
- from json import JSONDecodeError
3
+ import os
4
+ import requests
5
+ import shutil
6
+ import tempfile
7
+ import traceback
8
+ import yaml
9
+ from datetime import datetime , timedelta , timezone
10
+ from json import loads , JSONDecodeError
4
11
from logging import INFO , WARNING
12
+ from time import strptime
13
+ from pathlib import Path
5
14
6
15
from django .conf import settings
7
16
from django .db .models import QuerySet
17
26
BackfillNotificationRecord ,
18
27
BackfillRecord ,
19
28
BackfillReport ,
29
+ PerformanceFramework ,
30
+ PerformanceTelemetrySignature ,
31
+ PerformanceTelemetryAlert ,
32
+ PerformanceTelemetryAlertSummary ,
33
+ Push ,
34
+ Repository ,
20
35
)
21
36
22
37
logger = logging .getLogger (__name__ )
23
38
24
39
CLIENT_ID = settings .PERF_SHERIFF_BOT_CLIENT_ID
25
40
ACCESS_TOKEN = settings .PERF_SHERIFF_BOT_ACCESS_TOKEN
26
41
42
+ BUILDID_MAPPING = "https://hg.mozilla.org/mozilla-central/json-firefoxreleases"
43
+ REVISION_INFO = "https://hg.mozilla.org/mozilla-central/json-log/%s"
44
+
45
+ INITIAL_PROBES = (
46
+ "memory_ghost_windows" ,
47
+ "cycle_collector_time" ,
48
+ "mouseup_followed_by_click_present_latency" ,
49
+ "network_tcp_connection" ,
50
+ "network_tls_handshake" ,
51
+ "networking_http_channel_page_open_to_first_sent" ,
52
+ "performance_pageload_fcp" ,
53
+ "perf_largest_contentful_paint" ,
54
+ )
55
+
27
56
28
57
class Sherlock :
29
58
"""
@@ -49,6 +78,7 @@ def __init__(
49
78
50
79
self .supported_platforms = supported_platforms or settings .SUPPORTED_PLATFORMS
51
80
self ._wake_up_time = datetime .now ()
81
+ self ._buildid_mappings = {}
52
82
53
83
def sheriff (self , since : datetime , frameworks : list [str ], repositories : list [str ]):
54
84
logger .info ("Sherlock: Validating settings..." )
@@ -215,3 +245,237 @@ def __get_data_points_to_backfill(context: list[dict]) -> list[dict]:
215
245
start = 1
216
246
217
247
return context [start :]
248
+
249
+ def telemetry_alert (self ):
250
+ import mozdetect
251
+ from mozdetect .telemetry_query import get_metric_table
252
+
253
+ if (
254
+ not os .environ .get ("GOOGLE_APPLICATION_CREDENTIALS" )
255
+ and settings .SITE_HOSTNAME != "backend"
256
+ ):
257
+ raise Exception (
258
+ "GOOGLE_APPLICATION_CREDENTIALS must be defined in production. "
259
+ "Use GCLOUD_DIR for local testing."
260
+ )
261
+
262
+ ts_detectors = mozdetect .get_timeseries_detectors ()
263
+
264
+ metric_definitions = self ._get_metric_definitions ()
265
+
266
+ repository = Repository .objects .get (name = "mozilla-central" )
267
+ framework = PerformanceFramework .objects .get (name = "telemetry" )
268
+ for metric_info in metric_definitions :
269
+ if metric_info ["name" ] not in INITIAL_PROBES :
270
+ continue
271
+ logger .info (f"Running detection for { metric_info ['name' ]} " )
272
+ cdf_ts_detector = ts_detectors [
273
+ metric_info ["data" ]
274
+ .get ("monitor" , {})
275
+ .get ("change-detection-technique" , "cdf_squared" )
276
+ ]
277
+
278
+ for platform in ("Windows" , "Darwin" , "Linux" ):
279
+ if metric_info ["platform" ] == "mobile" and platform != "Mobile" :
280
+ continue
281
+ elif metric_info ["platform" ] == "desktop" and platform == "Mobile" :
282
+ continue
283
+ logger .info (f"On Platform { platform } " )
284
+ try :
285
+ data = get_metric_table (
286
+ metric_info ["name" ],
287
+ platform ,
288
+ android = (platform == "Mobile" ),
289
+ use_fog = True ,
290
+ )
291
+ if data .empty :
292
+ logger .info ("No data found" )
293
+ continue
294
+
295
+ timeseries = mozdetect .TelemetryTimeSeries (data )
296
+
297
+ ts_detector = cdf_ts_detector (timeseries )
298
+ detections = ts_detector .detect_changes ()
299
+
300
+ for detection in detections :
301
+ # Only get buildids if there might be a detection
302
+ if not self ._buildid_mappings :
303
+ self ._make_buildid_to_date_mapping ()
304
+ self ._create_detection_alert (
305
+ detection , metric_info , platform , repository , framework
306
+ )
307
+ except Exception :
308
+ logger .info (f"Failed: { traceback .format_exc ()} " )
309
+
310
+ def _create_detection_alert (
311
+ self ,
312
+ detection : Detection ,
313
+ probe_info : dict ,
314
+ platform : str ,
315
+ repository : Repository ,
316
+ framework : PerformanceFramework ,
317
+ ):
318
+ # Get, or create the signature
319
+ # TODO: Allow multiple channels, legacy probes, and different apps
320
+ probe_signature , _ = PerformanceTelemetrySignature .objects .update_or_create (
321
+ channel = "Nightly" ,
322
+ platform = platform ,
323
+ probe = probe_info ["name" ],
324
+ probe_type = "Glean" ,
325
+ application = "Firefox" ,
326
+ )
327
+
328
+ detection_date = str (detection .location )
329
+ if detection_date not in self ._buildid_mappings [platform ]:
330
+ # TODO: See if we should expand the range in this situation
331
+ detection_date = self ._find_closest_build_date (detection_date , platform )
332
+
333
+ detection_build = self ._buildid_mappings [platform ][detection_date ]
334
+ prev_build = self ._buildid_mappings [platform ][detection_build ["prev_build" ]]
335
+ next_build = self ._buildid_mappings [platform ][detection_build ["next_build" ]]
336
+
337
+ # Get the pushes for these builds
338
+ detection_push = Push .objects .get (
339
+ revision = detection_build ["node" ], repository__name = repository .name
340
+ )
341
+ prev_push = Push .objects .get (revision = prev_build ["node" ], repository__name = repository .name )
342
+ next_push = Push .objects .get (revision = next_build ["node" ], repository__name = repository .name )
343
+
344
+ # Check that an alert summary doesn't already exist around this point (+/- 1 day)
345
+ latest_timestamp = next_push .time + timedelta (days = 1 )
346
+ oldest_timestamp = next_push .time - timedelta (days = 1 )
347
+ try :
348
+ detection_summary = PerformanceTelemetryAlertSummary .objects .filter (
349
+ repository = repository ,
350
+ framework = framework ,
351
+ push__time__gte = oldest_timestamp ,
352
+ push__time__lte = latest_timestamp ,
353
+ ).latest ("push__time" )
354
+ except PerformanceTelemetryAlertSummary .DoesNotExist :
355
+ detection_summary = None
356
+
357
+ if not detection_summary :
358
+ # Create an alert summary to capture all alerts
359
+ # that occurred on the same date range
360
+ detection_summary , _ = PerformanceTelemetryAlertSummary .objects .get_or_create (
361
+ repository = repository ,
362
+ framework = framework ,
363
+ prev_push = prev_push ,
364
+ push = next_push ,
365
+ original_push = detection_push ,
366
+ defaults = {
367
+ "manually_created" : False ,
368
+ "created" : datetime .now (timezone .utc ),
369
+ },
370
+ )
371
+
372
+ detection_alert , _ = PerformanceTelemetryAlert .objects .update_or_create (
373
+ summary_id = detection_summary .id ,
374
+ series_signature = probe_signature ,
375
+ defaults = {
376
+ "is_regression" : True ,
377
+ "amount_pct" : round (
378
+ (100.0 * abs (detection .new_value - detection .previous_value ))
379
+ / float (detection .previous_value ),
380
+ 2 ,
381
+ ),
382
+ "amount_abs" : abs (detection .new_value - detection .previous_value ),
383
+ "sustained" : True ,
384
+ "direction" : detection .direction ,
385
+ "confidence" : detection .confidence ,
386
+ "prev_value" : detection .previous_value ,
387
+ "new_value" : detection .new_value ,
388
+ "prev_median" : detection .optional_detection_info ["Interpolated Median" ][0 ],
389
+ "new_median" : detection .optional_detection_info ["Interpolated Median" ][1 ],
390
+ "prev_p90" : detection .optional_detection_info ["Interpolated p05" ][0 ],
391
+ "new_p90" : detection .optional_detection_info ["Interpolated p05" ][1 ],
392
+ "prev_p95" : detection .optional_detection_info ["Interpolated p95" ][0 ],
393
+ "new_p95" : detection .optional_detection_info ["Interpolated p95" ][1 ],
394
+ },
395
+ )
396
+
397
+ def _get_metric_definitions (self ) -> list [dict ]:
398
+ metric_definition_urls = [
399
+ ("https://dictionary.telemetry.mozilla.org/data/firefox_desktop/index.json" , "desktop" ),
400
+ ("https://dictionary.telemetry.mozilla.org/data/fenix/index.json" , "mobile" ),
401
+ ]
402
+
403
+ merged_metrics = []
404
+
405
+ for url , platform in metric_definition_urls :
406
+ try :
407
+ logger .info (f"Getting probes from { url } " )
408
+ response = requests .get (url )
409
+ response .raise_for_status ()
410
+
411
+ data = response .json ()
412
+ metrics = data .get ("metrics" , [])
413
+ for metric in metrics :
414
+ merged_metrics .append (
415
+ {
416
+ "name" : metric ["name" ].replace ("." , "_" ),
417
+ "data" : metric ,
418
+ "platform" : platform ,
419
+ }
420
+ )
421
+
422
+ logger .info (f"Found { len (metrics )} probes" )
423
+ except requests .RequestException as e :
424
+ logger .info (f"Failed to fetch from { url } : { e } " )
425
+ except ValueError :
426
+ logger .info (f"Invalid JSON from { url } " )
427
+
428
+ return merged_metrics
429
+
430
+ def _make_buildid_to_date_mapping (self ):
431
+ # Always returned in order of newest to oldest, only capture
432
+ # the newest build for each day, and ignore others. This can
433
+ # differ between platforms too (e.g. failed builds)
434
+ buildid_mappings = self ._get_buildid_mappings ()
435
+
436
+ prev_date = {}
437
+ for build in buildid_mappings ["builds" ]:
438
+ platform = self ._replace_platform_build_name (build ["platform" ])
439
+ if not platform :
440
+ continue
441
+ curr_date = str (datetime .strptime (build ["buildid" ][:8 ], "%Y%m%d" ).date ())
442
+
443
+ platform_builds = self ._buildid_mappings .setdefault (platform , {})
444
+ if curr_date not in platform_builds :
445
+ platform_builds [curr_date ] = build
446
+
447
+ if prev_date .get (platform ):
448
+ platform_builds [prev_date [platform ]]["prev_build" ] = curr_date
449
+ platform_builds [curr_date ]["next_build" ] = prev_date [platform ]
450
+ else :
451
+ platform_builds [curr_date ]["next_build" ] = curr_date
452
+
453
+ prev_date [platform ] = curr_date
454
+
455
+ def _get_buildid_mappings (self ) -> dict :
456
+ try :
457
+ response = requests .get (BUILDID_MAPPING )
458
+ response .raise_for_status ()
459
+ return loads (response .content )
460
+ except requests .RequestException as e :
461
+ raise Exception (f"Failed to download buildid mappings, cannot produce detections: { e } " )
462
+
463
+ def _replace_platform_build_name (self , platform : str ) -> str :
464
+ if platform == "win64" :
465
+ return "Windows"
466
+ if platform == "linux64" :
467
+ return "Linux"
468
+ if platform == "mac" :
469
+ return "Darwin"
470
+ return ""
471
+
472
+ def _find_closest_build_date (self , detection_date : str , platform : str ) -> str :
473
+ # Get the closest date to the detection date
474
+ prev_date = None
475
+
476
+ for date in sorted (list (self ._buildid_mappings [platform ].keys ())):
477
+ if date > detection_date :
478
+ break
479
+ prev_date = date
480
+
481
+ return prev_date
0 commit comments