mozilla · gmierz · May 2, 2025 · May 13, 2025
diff --git a/schemas/performance-artifact.json b/schemas/performance-artifact.json
@@ -205,6 +205,21 @@
           "type": "integer",
           "minimum": 1,
           "maximum": 255
+        },
+        "monitor": {
+          "description": "Enable non-sheriffed alerting (ignores shouldAlert settings)",
+          "title": "Monitor",
+          "type": "boolean"
+        },
+        "alertNotifyEmails": {
+          "type": "array",
+          "title": "Notify these emails for any alerts produced",
+          "items": {
+            "type": "string",
+            "maxLength": 100
+          },
+          "uniqueItems": true,
+          "maxItems": 8
         }
       },
       "required": ["name", "subtests"],

diff --git a/tests/etl/conftest.py b/tests/etl/conftest.py
@@ -21,3 +21,10 @@ def perf_job(perf_push, failure_classifications, generic_reference_data):
     return create_generic_job(
         "myfunguid", perf_push.repository, perf_push.id, generic_reference_data
     )
+
+
+@pytest.fixture
+def perf_job_nonsheriffed(perf_push, failure_classifications, generic_reference_data):
+    return create_generic_job(
+        "myfunguid", perf_push.repository, perf_push.id, generic_reference_data, tier=3
+    )
diff --git a/tests/etl/test_perf_data_load.py b/tests/etl/test_perf_data_load.py
@@ -3,6 +3,7 @@
 import json
 import operator
 import time
+from unittest import mock
 
 import pytest
 from django.core.management import call_command
@@ -234,6 +235,52 @@ def test_default_ingest_workflow(
             _verify_datum(suite["name"], subtest["name"], subtest["value"], perf_push.time)
 
 
+@mock.patch("treeherder.etl.perf.generate_alerts")
+def test_monitored_perf_data(
+    mocked_generate_alerts,
+    test_repository,
+    perf_push,
+    later_perf_push,
+    perf_job_nonsheriffed,
+    generic_reference_data,
+    sample_perf_artifact,
+):
+    for suite in sample_perf_artifact["blob"]["suites"]:
+        suite["monitor"] = True
+    perf_datum, submit_datum = _prepare_test_data(sample_perf_artifact)
+
+    store_performance_artifact(perf_job_nonsheriffed, submit_datum)
+
+    assert DATA_PER_ARTIFACT == PerformanceSignature.objects.all().count()
+    assert 1 == PerformanceFramework.objects.all().count()
+
+    # Ensure that alert generation gets triggered on a tier-3 test with
+    # monitor set to True
+    mocked_generate_alerts.apply_async.assert_called()
+
+
+@mock.patch("treeherder.etl.perf.generate_alerts")
+def test_unmonitored_perf_data(
+    mocked_generate_alerts,
+    test_repository,
+    perf_push,
+    later_perf_push,
+    perf_job_nonsheriffed,
+    generic_reference_data,
+    sample_perf_artifact,
+):
+    perf_datum, submit_datum = _prepare_test_data(sample_perf_artifact)
+
+    store_performance_artifact(perf_job_nonsheriffed, submit_datum)
+
+    assert DATA_PER_ARTIFACT == PerformanceSignature.objects.all().count()
+    assert 1 == PerformanceFramework.objects.all().count()
+
+    # Ensure that alert generation does not get triggered when monitor is False
+    # on a tier-3 job
+    mocked_generate_alerts.apply_async.assert_not_called()
+
+
 def test_hash_remains_unchanged_for_default_ingestion_workflow(
     test_repository, perf_job, sample_perf_artifact
 ):

@@ -1,5 +1,6 @@
 import datetime
 import time
+from unittest import mock
 
 import pytest
 
@@ -341,3 +342,101 @@ def test_alert_change_type_absolute(
 
     assert PerformanceAlert.objects.count() == expected_num_alerts
     assert PerformanceAlertSummary.objects.count() == expected_num_alerts
+
+
+def test_alert_monitor_no_sheriff(
+    test_repository,
+    test_issue_tracker,
+    failure_classifications,
+    generic_reference_data,
+    test_perf_signature,
+):
+    # modify the test signature to have it as a monitored signature, but not sheriffed
+    test_perf_signature.monitor = True
+    test_perf_signature.should_alert = True
+    test_perf_signature.save()
+
+    base_time = time.time()  # generate it based off current time
+    interval = 60
+    _generate_performance_data(
+        test_repository,
+        test_perf_signature,
+        base_time,
+        1,
+        0.5,
+        int(interval / 2),
+    )
+    _generate_performance_data(
+        test_repository,
+        test_perf_signature,
+        base_time,
+        int(interval / 2) + 1,
+        1.0,
+        int(interval / 2),
+    )
+
+    generate_new_alerts_in_series(test_perf_signature)
+
+    assert PerformanceAlert.objects.count() == 1
+    assert PerformanceAlertSummary.objects.count() == 1
+
+    # When monitor is true, then alert should not be sheriffed
+    # regardless of should_alert settings
+    assert [not alert.sheriffed for alert in PerformanceAlert.objects.all()]
+
+
+@mock.patch("treeherder.perf.alerts.taskcluster")
+def test_alert_emails(
+    mocked_taskcluster,
+    test_repository,
+    test_issue_tracker,
+    failure_classifications,
+    generic_reference_data,
+    test_perf_signature,
+):
+    mocked_email_client = mock.MagicMock()
+    mocked_taskcluster.notify_client_factory.return_value = mocked_email_client
+
+    emails = "[email protected] [email protected]"
+    test_perf_signature.alert_notify_emails = emails
+    test_perf_signature.save()
+
+    base_time = time.time()  # generate it based off current time
+    interval = 60
+    _generate_performance_data(
+        test_repository,
+        test_perf_signature,
+        base_time,
+        1,
+        0.5,
+        int(interval / 2),
+    )
+    _generate_performance_data(
+        test_repository,
+        test_perf_signature,
+        base_time,
+        int(interval / 2) + 1,
+        1.0,
+        int(interval / 2),
+    )
+
+    generate_new_alerts_in_series(test_perf_signature)
+
+    assert PerformanceAlert.objects.count() == 1
+    assert PerformanceAlertSummary.objects.count() == 1
+
+    # When monitor is False, then the alerts should be sheriffed
+    assert [alert.sheriffed for alert in PerformanceAlert.objects.all()]
+
+    # Make sure the email service was called correctly for 2 emails
+    assert mocked_taskcluster.notify_client_factory.call_count == 1
+    assert mocked_email_client.email.call_count == 2
+
+    # Ensure that each email specified has an email sent to it
+    for email in emails.split():
+        assert any(
+            [
+                email in call_arg[0][0]["address"]
+                for call_arg in mocked_email_client.email.call_args_list
+            ]
+        )
diff --git a/treeherder/etl/perf.py b/treeherder/etl/perf.py
@@ -97,7 +97,7 @@ def _suite_should_alert_based_on(
         and new_datum_ingested
         and job.repository.performance_alerts_enabled
         and job.tier_is_sheriffable
-    )
+    ) or (signature.monitor and job.repository.name not in ("try",))
 
 
 def _test_should_alert_based_on(
@@ -114,7 +114,7 @@ def _test_should_alert_based_on(
         and new_datum_ingested
         and job.repository.performance_alerts_enabled
         and job.tier_is_sheriffable
-    )
+    ) or (signature.monitor and job.repository.name not in ("try",))
 
 
 def _test_should_gather_replicates_based_on(
@@ -205,6 +205,8 @@ def _load_perf_datum(job: Job, perf_datum: dict):
                     # these properties below can be either True, False, or null
                     # (None). Null indicates no preference has been set.
                     "should_alert": suite.get("shouldAlert"),
+                    "monitor": suite.get("monitor"),
+                    "alert_notify_emails": _order_and_concat(suite.get("alertNotifyEmails", [])),
                     "alert_change_type": PerformanceSignature._get_alert_change_type(
                         suite.get("alertChangeType")
                     ),
@@ -272,6 +274,8 @@ def _load_perf_datum(job: Job, perf_datum: dict):
                     # null (None). Null indicates no preference has been
                     # set.
                     "should_alert": subtest.get("shouldAlert"),
+                    "monitor": suite.get("monitor"),
+                    "alert_notify_emails": _order_and_concat(suite.get("alertNotifyEmails", [])),
                     "alert_change_type": PerformanceSignature._get_alert_change_type(
                         subtest.get("alertChangeType")
                     ),

@@ -9,17 +9,29 @@
 from django.conf import settings
 from django.db import transaction
 
+from treeherder.perf.email import AlertNotificationWriter
 from treeherder.perf.models import (
     PerformanceAlert,
     PerformanceAlertSummary,
     PerformanceDatum,
     PerformanceSignature,
 )
 from treeherder.perfalert.perfalert import RevisionDatum, detect_changes
+from treeherder.services import taskcluster
 
 logger = logging.getLogger(__name__)
 
 
+def send_alert_emails(emails, alert, alert_summary):
+    notify_client = taskcluster.notify_client_factory()
+
+    for email in emails:
+        logger.info(f"Sending alert email to {email}")
+        notification_writer = AlertNotificationWriter()
+        email = notification_writer.prepare_new_email(email, alert, alert_summary)
+        notify_client.email(email)
+
+
 def geomean(iterable):
     # Returns a geomean of a list of values.
     a = np.array(iterable)
@@ -138,6 +150,7 @@ def generate_new_alerts_in_series(signature):
                     framework=signature.framework,
                     push_id=cur.push_id,
                     prev_push_id=prev.push_id,
+                    sheriffed=not signature.monitor,
                     defaults={
                         "manually_created": False,
                         "created": datetime.utcfromtimestamp(cur.push_timestamp),
@@ -150,9 +163,10 @@ def generate_new_alerts_in_series(signature):
                 if t_value == float("inf"):
                     t_value = 1000
 
-                PerformanceAlert.objects.update_or_create(
+                alert, _ = PerformanceAlert.objects.update_or_create(
                     summary=summary,
                     series_signature=signature,
+                    sheriffed=not signature.monitor,
                     defaults={
                         "noise_profile": noise_profile,
                         "is_regression": alert_properties.is_regression,
@@ -163,3 +177,6 @@ def generate_new_alerts_in_series(signature):
                         "t_value": t_value,
                     },
                 )
+
+                if signature.alert_notify_emails:
+                    send_alert_emails(signature.alert_notify_emails.split(), alert, summary)
@@ -292,3 +292,47 @@ def _write_content(self, must_mention: list[PerformanceSignature]):
         content.include_signatures(must_mention)
 
         self._email.content = str(content)
+
+
+class AlertNotificationContent:
+    DESCRIPTION = "Perfherder has detected a performance change on the test **{test}** and produced an alert. See {alert_summary} for more information."
+    ALERT_SUMMARY_LINK = "https://treeherder.mozilla.org/perfherder/alerts?id={alert_summary_id}"
+
+    def __init__(self):
+        self._raw_content = None
+
+    def include_alert_summary(self, test: str, alert_summary_id: str):
+        self._raw_content = self.DESCRIPTION.format(
+            test=test,
+            alert_summary=(
+                f"[this alert summary]"
+                f"({self.ALERT_SUMMARY_LINK.format(alert_summary_id=alert_summary_id)})"
+            ),
+        )
+
+    def __str__(self):
+        if self._raw_content is None:
+            raise ValueError("No content set")
+        return self._raw_content
+
+
+class AlertNotificationWriter(EmailWriter):
+    def prepare_new_email(
+        self, email: str, alert: PerformanceAlert, alert_summary: PerformanceAlertSummary
+    ) -> dict:
+        self._write_address(email)
+        self._write_subject(alert.series_signature.test)
+        self._write_content(alert.series_signature.test, str(alert_summary.id))
+        return self.email
+
+    def _write_address(self, email: str):
+        self._email.address = email
+
+    def _write_subject(self, test: str):
+        self._email.subject = f"Alert: Test {test} detected a performance change"
+
+    def _write_content(self, test: str, alert_summary_id: str):
+        content = AlertNotificationContent()
+        content.include_alert_summary(test, alert_summary_id)
+
+        self._email.content = str(content)