Skip to content

Commit 9487cc7

Browse files
committed
Bug 1964128 - Enable monitor-only alerting for performance tests with email notifications.
1 parent d5c8ad4 commit 9487cc7

14 files changed

+348
-5
lines changed

schemas/performance-artifact.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,21 @@
205205
"type": "integer",
206206
"minimum": 1,
207207
"maximum": 255
208+
},
209+
"monitor": {
210+
"description": "Enable non-sheriffed alerting (ignores shouldAlert settings)",
211+
"title": "Monitor",
212+
"type": "boolean"
213+
},
214+
"alertNotifyEmails": {
215+
"type": "array",
216+
"title": "Notify these emails for any alerts produced",
217+
"items": {
218+
"type": "string",
219+
"maxLength": 100
220+
},
221+
"uniqueItems": true,
222+
"maxItems": 8
208223
}
209224
},
210225
"required": ["name", "subtests"],

tests/etl/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,10 @@ def perf_job(perf_push, failure_classifications, generic_reference_data):
2121
return create_generic_job(
2222
"myfunguid", perf_push.repository, perf_push.id, generic_reference_data
2323
)
24+
25+
26+
@pytest.fixture
27+
def perf_job_nonsheriffed(perf_push, failure_classifications, generic_reference_data):
28+
return create_generic_job(
29+
"myfunguid", perf_push.repository, perf_push.id, generic_reference_data, tier=3
30+
)

tests/etl/test_perf_data_load.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import operator
55
import time
6+
from unittest import mock
67

78
import pytest
89
from django.core.management import call_command
@@ -234,6 +235,52 @@ def test_default_ingest_workflow(
234235
_verify_datum(suite["name"], subtest["name"], subtest["value"], perf_push.time)
235236

236237

238+
@mock.patch("treeherder.etl.perf.generate_alerts")
239+
def test_monitored_perf_data(
240+
mocked_generate_alerts,
241+
test_repository,
242+
perf_push,
243+
later_perf_push,
244+
perf_job_nonsheriffed,
245+
generic_reference_data,
246+
sample_perf_artifact,
247+
):
248+
for suite in sample_perf_artifact["blob"]["suites"]:
249+
suite["monitor"] = True
250+
perf_datum, submit_datum = _prepare_test_data(sample_perf_artifact)
251+
252+
store_performance_artifact(perf_job_nonsheriffed, submit_datum)
253+
254+
assert DATA_PER_ARTIFACT == PerformanceSignature.objects.all().count()
255+
assert 1 == PerformanceFramework.objects.all().count()
256+
257+
# Ensure that alert generation gets triggered on a tier-3 test with
258+
# monitor set to True
259+
mocked_generate_alerts.apply_async.assert_called()
260+
261+
262+
@mock.patch("treeherder.etl.perf.generate_alerts")
263+
def test_unmonitored_perf_data(
264+
mocked_generate_alerts,
265+
test_repository,
266+
perf_push,
267+
later_perf_push,
268+
perf_job_nonsheriffed,
269+
generic_reference_data,
270+
sample_perf_artifact,
271+
):
272+
perf_datum, submit_datum = _prepare_test_data(sample_perf_artifact)
273+
274+
store_performance_artifact(perf_job_nonsheriffed, submit_datum)
275+
276+
assert DATA_PER_ARTIFACT == PerformanceSignature.objects.all().count()
277+
assert 1 == PerformanceFramework.objects.all().count()
278+
279+
# Ensure that alert generation does not get triggered when monitor is False
280+
# on a tier-3 job
281+
mocked_generate_alerts.apply_async.assert_not_called()
282+
283+
237284
def test_hash_remains_unchanged_for_default_ingestion_workflow(
238285
test_repository, perf_job, sample_perf_artifact
239286
):

tests/perfalert/test_alerts.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import datetime
22
import time
3+
from unittest import mock
34

45
import pytest
56

@@ -341,3 +342,101 @@ def test_alert_change_type_absolute(
341342

342343
assert PerformanceAlert.objects.count() == expected_num_alerts
343344
assert PerformanceAlertSummary.objects.count() == expected_num_alerts
345+
346+
347+
def test_alert_monitor_no_sheriff(
348+
test_repository,
349+
test_issue_tracker,
350+
failure_classifications,
351+
generic_reference_data,
352+
test_perf_signature,
353+
):
354+
# modify the test signature to have it as a monitored signature, but not sheriffed
355+
test_perf_signature.monitor = True
356+
test_perf_signature.should_alert = True
357+
test_perf_signature.save()
358+
359+
base_time = time.time() # generate it based off current time
360+
interval = 60
361+
_generate_performance_data(
362+
test_repository,
363+
test_perf_signature,
364+
base_time,
365+
1,
366+
0.5,
367+
int(interval / 2),
368+
)
369+
_generate_performance_data(
370+
test_repository,
371+
test_perf_signature,
372+
base_time,
373+
int(interval / 2) + 1,
374+
1.0,
375+
int(interval / 2),
376+
)
377+
378+
generate_new_alerts_in_series(test_perf_signature)
379+
380+
assert PerformanceAlert.objects.count() == 1
381+
assert PerformanceAlertSummary.objects.count() == 1
382+
383+
# When monitor is true, then alert should not be sheriffed
384+
# regardless of should_alert settings
385+
assert [alert.sheriffed == False for alert in PerformanceAlert.objects.all()]
386+
387+
388+
@mock.patch("treeherder.perf.alerts.taskcluster")
389+
def test_alert_emails(
390+
mocked_taskcluster,
391+
test_repository,
392+
test_issue_tracker,
393+
failure_classifications,
394+
generic_reference_data,
395+
test_perf_signature,
396+
):
397+
mocked_email_client = mock.MagicMock()
398+
mocked_taskcluster.notify_client_factory.return_value = mocked_email_client
399+
400+
401+
test_perf_signature.alert_notify_emails = emails
402+
test_perf_signature.save()
403+
404+
base_time = time.time() # generate it based off current time
405+
interval = 60
406+
_generate_performance_data(
407+
test_repository,
408+
test_perf_signature,
409+
base_time,
410+
1,
411+
0.5,
412+
int(interval / 2),
413+
)
414+
_generate_performance_data(
415+
test_repository,
416+
test_perf_signature,
417+
base_time,
418+
int(interval / 2) + 1,
419+
1.0,
420+
int(interval / 2),
421+
)
422+
423+
generate_new_alerts_in_series(test_perf_signature)
424+
425+
assert PerformanceAlert.objects.count() == 1
426+
assert PerformanceAlertSummary.objects.count() == 1
427+
428+
# When monitor is False, then the alerts should be sheriffed
429+
assert [alert.sheriffed == True for alert in PerformanceAlert.objects.all()]
430+
431+
# Make sure the email service was called correctly for 2 emails
432+
assert mocked_taskcluster.notify_client_factory.call_count == 1
433+
assert mocked_email_client.email.call_count == 2
434+
435+
# Ensure that each email specified has an email sent to it
436+
for email in emails.split():
437+
assert any(
438+
[
439+
email in call_arg[0][0]["address"]
440+
for call_arg in mocked_email_client.email.call_args_list
441+
]
442+
)

treeherder/etl/perf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def _suite_should_alert_based_on(
9797
and new_datum_ingested
9898
and job.repository.performance_alerts_enabled
9999
and job.tier_is_sheriffable
100-
)
100+
) or (signature.monitor and job.repository.name not in ("try",))
101101

102102

103103
def _test_should_alert_based_on(
@@ -114,7 +114,7 @@ def _test_should_alert_based_on(
114114
and new_datum_ingested
115115
and job.repository.performance_alerts_enabled
116116
and job.tier_is_sheriffable
117-
)
117+
) or (signature.monitor and job.repository.name not in ("try",))
118118

119119

120120
def _test_should_gather_replicates_based_on(
@@ -205,6 +205,8 @@ def _load_perf_datum(job: Job, perf_datum: dict):
205205
# these properties below can be either True, False, or null
206206
# (None). Null indicates no preference has been set.
207207
"should_alert": suite.get("shouldAlert"),
208+
"monitor": suite.get("monitor"),
209+
"alert_notify_emails": _order_and_concat(suite.get("alertNotifyEmails", [])),
208210
"alert_change_type": PerformanceSignature._get_alert_change_type(
209211
suite.get("alertChangeType")
210212
),
@@ -272,6 +274,8 @@ def _load_perf_datum(job: Job, perf_datum: dict):
272274
# null (None). Null indicates no preference has been
273275
# set.
274276
"should_alert": subtest.get("shouldAlert"),
277+
"monitor": suite.get("monitor"),
278+
"alert_notify_emails": _order_and_concat(suite.get("alertNotifyEmails", [])),
275279
"alert_change_type": PerformanceSignature._get_alert_change_type(
276280
subtest.get("alertChangeType")
277281
),

treeherder/perf/alerts.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,29 @@
99
from django.conf import settings
1010
from django.db import transaction
1111

12+
from treeherder.perf.email import AlertNotificationWriter
1213
from treeherder.perf.models import (
1314
PerformanceAlert,
1415
PerformanceAlertSummary,
1516
PerformanceDatum,
1617
PerformanceSignature,
1718
)
1819
from treeherder.perfalert.perfalert import RevisionDatum, detect_changes
20+
from treeherder.services import taskcluster
1921

2022
logger = logging.getLogger(__name__)
2123

2224

25+
def send_alert_emails(emails, alert, alert_summary):
26+
notify_client = taskcluster.notify_client_factory()
27+
28+
for email in emails:
29+
logger.info(f"Sending alert email to {email}")
30+
notification_writer = AlertNotificationWriter()
31+
email = notification_writer.prepare_new_email(email, alert, alert_summary)
32+
notify_client.email(email)
33+
34+
2335
def geomean(iterable):
2436
# Returns a geomean of a list of values.
2537
a = np.array(iterable)
@@ -138,6 +150,7 @@ def generate_new_alerts_in_series(signature):
138150
framework=signature.framework,
139151
push_id=cur.push_id,
140152
prev_push_id=prev.push_id,
153+
sheriffed=not signature.monitor,
141154
defaults={
142155
"manually_created": False,
143156
"created": datetime.utcfromtimestamp(cur.push_timestamp),
@@ -150,9 +163,10 @@ def generate_new_alerts_in_series(signature):
150163
if t_value == float("inf"):
151164
t_value = 1000
152165

153-
PerformanceAlert.objects.update_or_create(
166+
alert, _ = PerformanceAlert.objects.update_or_create(
154167
summary=summary,
155168
series_signature=signature,
169+
sheriffed=not signature.monitor,
156170
defaults={
157171
"noise_profile": noise_profile,
158172
"is_regression": alert_properties.is_regression,
@@ -163,3 +177,6 @@ def generate_new_alerts_in_series(signature):
163177
"t_value": t_value,
164178
},
165179
)
180+
181+
if signature.alert_notify_emails:
182+
send_alert_emails(signature.alert_notify_emails.split(), alert, summary)

treeherder/perf/email.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,3 +292,47 @@ def _write_content(self, must_mention: list[PerformanceSignature]):
292292
content.include_signatures(must_mention)
293293

294294
self._email.content = str(content)
295+
296+
297+
class AlertNotificationContent:
298+
DESCRIPTION = "Perfherder has detected a performance change on the test **{test}** and produced an alert. See {alert_summary} for more information."
299+
ALERT_SUMMARY_LINK = "https://treeherder.mozilla.org/perfherder/alerts?id={alert_summary_id}"
300+
301+
def __init__(self):
302+
self._raw_content = None
303+
304+
def include_alert_summary(self, test: str, alert_summary_id: str):
305+
self._raw_content = self.DESCRIPTION.format(
306+
test=test,
307+
alert_summary=(
308+
f"[this alert summary]"
309+
f"({self.ALERT_SUMMARY_LINK.format(alert_summary_id=alert_summary_id)})"
310+
),
311+
)
312+
313+
def __str__(self):
314+
if self._raw_content is None:
315+
raise ValueError("No content set")
316+
return self._raw_content
317+
318+
319+
class AlertNotificationWriter(EmailWriter):
320+
def prepare_new_email(
321+
self, email: str, alert: PerformanceAlert, alert_summary: PerformanceAlertSummary
322+
) -> dict:
323+
self._write_address(email)
324+
self._write_subject(alert.series_signature.test)
325+
self._write_content(alert.series_signature.test, str(alert_summary.id))
326+
return self.email
327+
328+
def _write_address(self, email: str):
329+
self._email.address = email
330+
331+
def _write_subject(self, test: str):
332+
self._email.subject = f"Alert: Test {test} detected a performance change"
333+
334+
def _write_content(self, test: str, alert_summary_id: str):
335+
content = AlertNotificationContent()
336+
content.include_alert_summary(test, alert_summary_id)
337+
338+
self._email.content = str(content)

0 commit comments

Comments
 (0)