diff --git a/go.mod b/go.mod
index 879b1b32..3186ca58 100644
--- a/go.mod
+++ b/go.mod
@@ -22,12 +22,12 @@ require (
 	github.com/prometheus/client_golang v1.22.0
 	github.com/prometheus/common v0.63.0
 	github.com/spf13/cobra v1.8.1
-	github.com/stretchr/testify v1.10.0
 	go.uber.org/mock v0.4.0
 	go.uber.org/zap v1.27.0
 	gopkg.in/yaml.v2 v2.4.0
 	k8s.io/api v0.31.3
 	k8s.io/apimachinery v0.31.3
+	k8s.io/client-go v0.31.3
 	sigs.k8s.io/controller-runtime v0.19.0
 )
 
@@ -123,7 +123,6 @@ require (
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
 	github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
-	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/client_model v0.6.1 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/sagikazarmark/locafero v0.7.0 // indirect
@@ -155,7 +154,6 @@ require (
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	k8s.io/cli-runtime v0.30.3 // indirect
-	k8s.io/client-go v0.31.3 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
 	k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
diff --git a/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/machinehealthcheckunterminatedshortcircuitsre.go b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/machinehealthcheckunterminatedshortcircuitsre.go
new file mode 100644
index 00000000..c4b5af4a
--- /dev/null
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/machinehealthcheckunterminatedshortcircuitsre.go
@@ -0,0 +1,329 @@
+/*
+machinehealthcheckunterminatedshortcircuitsre defines the investigation logic for the MachineHealthCheckUnterminatedShortCircuitSRE alert
+*/
+package machinehealthcheckunterminatedshortcircuitsre
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"slices"
+	"strings"
+	"time"
+
+	machinev1beta1 "github.com/openshift/api/machine/v1beta1"
+
+	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/investigation"
+	machineutil "github.com/openshift/configuration-anomaly-detection/pkg/investigations/utils/machine"
+	nodeutil "github.com/openshift/configuration-anomaly-detection/pkg/investigations/utils/node"
+	k8sclient "github.com/openshift/configuration-anomaly-detection/pkg/k8s"
+	"github.com/openshift/configuration-anomaly-detection/pkg/logging"
+	"github.com/openshift/configuration-anomaly-detection/pkg/notewriter"
+
+	corev1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+const (
+	alertname = "MachineHealthCheckUnterminatedShortCircuitSRE"
+	// remediationName must match the name of this investigation's directory, so it can be looked up via the backplane-api
+	remediationName = "machineHealthCheckUnterminatedShortCircuitSRE"
+)
+
+type Investigation struct {
+	// kclient provides access to on-cluster resources
+	kclient client.Client
+	// notes holds the messages that will be shared with Primary upon completion
+	notes *notewriter.NoteWriter
+	// recommendations holds the set of actions CAD recommends primary to take
+	recommendations investigationRecommendations
+}
+
+func (i *Investigation) setup(r *investigation.Resources) error {
+	// Setup investigation
+	k, err := k8sclient.New(r.Cluster.ID(), r.OcmClient, remediationName)
+	if err != nil {
+		return fmt.Errorf("failed to initialize kubernetes client: %w", err)
+	}
+	i.kclient = k
+	i.notes = notewriter.New(r.Name, logging.RawLogger)
+	i.recommendations = investigationRecommendations{}
+
+	return nil
+}
+
+// Run investigates the MachineHealthCheckUnterminatedShortCircuitSRE alert
+//
+// The investigation seeks to provide exactly one recommended action per affected machine/node pair.
+// The machine object is evaluated first, as it represents a lower-level object that could affect the health of the node.
+// If the investigation determines that the breakage is occurring at the machine-level, the corresponding node is *not* investigated.
+// After investigating all affected machines, potentially affected nodes are investigated.
+func (i *Investigation) Run(r *investigation.Resources) (investigation.InvestigationResult, error) {
+	ctx := context.Background()
+	result := investigation.InvestigationResult{}
+
+	// Setup & teardown
+	err := i.setup(r)
+	if err != nil {
+		return result, fmt.Errorf("failed to setup investigation: %w", err)
+	}
+	defer func(r *investigation.Resources) {
+		err := k8sclient.Cleanup(r.Cluster.ID(), r.OcmClient, remediationName)
+		if err != nil {
+			logging.Errorf("failed to cleanup investigation: %w", err)
+		}
+	}(r)
+
+	targetMachines, err := i.getMachinesFromFailingMHC(ctx)
+	if err != nil {
+		i.notes.AppendWarning("failed to retrieve one or more target machines: %v", err)
+	}
+	if len(targetMachines) == 0 {
+		i.notes.AppendWarning("no machines found for short-circuited machinehealthcheck objects")
+		return result, r.PdClient.EscalateIncidentWithNote(i.notes.String())
+	}
+
+	problemMachines, err := i.InvestigateMachines(ctx, targetMachines)
+	if err != nil {
+		i.notes.AppendWarning("failed to investigate machines: %v", err)
+	}
+
+	// Trim out the machines that we've already investigated and know have problems
+	//
+	// The purpose of this is to avoid re-investigating nodes whose machines were already investigated. Any node-level issue on a failing machine
+	// is most likely related to the machine itself, and providing duplicate/conflicting advice will only prove confusing to the responder
+	targetMachines = slices.DeleteFunc(targetMachines, func(targetMachine machinev1beta1.Machine) bool {
+		for _, problemMachine := range problemMachines {
+			if problemMachine.Name == targetMachine.Name {
+				return true
+			}
+		}
+		return false
+	})
+
+	// If one or more machines managed by the machinehealthcheck have not yet been identified as a problem, check on the machine's
+	// node to determine if there are node-level problems that need remediating
+	if len(targetMachines) > 0 {
+		targetNodes, err := machineutil.GetNodesForMachines(ctx, i.kclient, targetMachines)
+		if err != nil {
+			i.notes.AppendWarning("failed to retrieve one or more target nodes: %v", err)
+		}
+		if len(targetNodes) > 0 {
+			i.InvestigateNodes(targetNodes)
+		}
+	}
+
+	// Summarize recommendations from investigation in PD notes, if any found
+	if len(i.recommendations) > 0 {
+		i.notes.AppendWarning(i.recommendations.summarize())
+	} else {
+		i.notes.AppendSuccess("no recommended actions to take against cluster")
+	}
+
+	return result, r.PdClient.EscalateIncidentWithNote(i.notes.String())
+}
+
+func (i *Investigation) getMachinesFromFailingMHC(ctx context.Context) ([]machinev1beta1.Machine, error) {
+	healthchecks := machinev1beta1.MachineHealthCheckList{}
+	err := i.kclient.List(ctx, &healthchecks, &client.ListOptions{Namespace: machineutil.MachineNamespace})
+	if err != nil {
+		return []machinev1beta1.Machine{}, fmt.Errorf("failed to retrieve machinehealthchecks from cluster: %w", err)
+	}
+
+	targets := []machinev1beta1.Machine{}
+	for _, healthcheck := range healthchecks.Items {
+		if !machineutil.HealthcheckRemediationAllowed(healthcheck) {
+			machines, err := machineutil.GetMachinesForMHC(ctx, i.kclient, healthcheck)
+			if err != nil {
+				i.notes.AppendWarning("failed to retrieve machines from machinehealthcheck %q: %v", healthcheck.Name, err)
+				continue
+			}
+			targets = append(targets, machines...)
+		}
+	}
+
+	return targets, nil
+}
+
+// InvestigateMachines evaluates the state of the machines in the cluster and returns a list of the failing machines, along with a categorized set of recommendations based on the failure state of
+// each machine
+func (i *Investigation) InvestigateMachines(ctx context.Context, targets []machinev1beta1.Machine) ([]machinev1beta1.Machine, error) {
+	investigatedMachines := []machinev1beta1.Machine{}
+	var errs error
+	for _, machine := range targets {
+		if machine.DeletionTimestamp != nil {
+			err := i.investigateDeletingMachine(ctx, machine)
+			investigatedMachines = append(investigatedMachines, machine)
+			if err != nil {
+				errs = errors.Join(errs, fmt.Errorf("failed to investigate deleting machine: %w", err))
+			}
+			continue
+		}
+
+		if (machine.Status.Phase != nil && *machine.Status.Phase == machinev1beta1.PhaseFailed) || machine.Status.ErrorReason != nil {
+			err := i.investigateFailingMachine(machine)
+			investigatedMachines = append(investigatedMachines, machine)
+			if err != nil {
+				errs = errors.Join(errs, fmt.Errorf("failed to investigate failing machine: %w", err))
+			}
+		}
+	}
+
+	if len(investigatedMachines) == 0 {
+		i.notes.AppendSuccess("no deleting or Failed machines found")
+	}
+	return investigatedMachines, errs
+}
+
+// investigateFailingMachin evaluates a machine whose .Status.Phase is failed, and provides a recommendation based on the cause of the failure
+func (i *Investigation) investigateFailingMachine(machine machinev1beta1.Machine) error {
+	role, err := machineutil.GetRole(machine)
+	if err != nil {
+		// Failing to determine whether a machine is Red Hat-managed warrants manual investigation
+		notes := fmt.Sprintf("unable to determine machine role: %v", err)
+		i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+		return fmt.Errorf("failed to determine role for machine %q: %w", machine.Name, err)
+	}
+
+	// Safely dereference status fields to avoid panics in cases where machines' statuses haven't been fully populated
+	var errorMsg string
+	if machine.Status.ErrorMessage != nil {
+		errorMsg = *machine.Status.ErrorMessage
+	}
+
+	var errorReason machinev1beta1.MachineStatusError
+	if machine.Status.ErrorReason != nil {
+		errorReason = *machine.Status.ErrorReason
+	}
+
+	if role != machineutil.WorkerRoleLabelValue {
+		// If machine is Red-Hat owned, always require manual investigation
+		notes := fmt.Sprintf("Red Hat-owned machine in state %q due to %q", errorReason, errorMsg)
+		i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+		return nil
+	}
+
+	switch errorReason {
+	case machinev1beta1.IPAddressInvalidReason:
+		notes := fmt.Sprintf("invalid IP address: %q. Deleting machine may allow the cloud provider to assign a valid IP address", errorMsg)
+		i.recommendations.addRecommendation(recommendationDeleteMachine, machine.Name, notes)
+
+	case machinev1beta1.CreateMachineError:
+		notes := fmt.Sprintf("machine failed to create: %q. Deleting machine may resolve any transient issues with the cloud provider", errorMsg)
+		i.recommendations.addRecommendation(recommendationDeleteMachine, machine.Name, notes)
+
+	case machinev1beta1.InvalidConfigurationMachineError:
+		notes := fmt.Sprintf("the machine configuration is invalid: %q. Checking splunk audit logs may indicate whether the customer has modified the machine or its machineset", errorMsg)
+		i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+
+	case machinev1beta1.DeleteMachineError:
+		notes := fmt.Sprintf("the machine's node could not be gracefully terminated automatically: %q", errorMsg)
+		i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+
+	case machinev1beta1.InsufficientResourcesMachineError:
+		notes := fmt.Sprintf("a servicelog should be sent because there is insufficient quota to provision the machine: %q", errorMsg)
+		i.recommendations.addRecommendation(recommendationQuotaServiceLog, machine.Name, notes)
+
+	default:
+		notes := "no .Status.ErrorReason found for machine"
+		i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+	}
+	return nil
+}
+
+// InvestigateDeletingMachines evaluates machines which are being deleted, to determine if & why they are blocked, along with a recommendation on how to unblock them
+func (i *Investigation) investigateDeletingMachine(ctx context.Context, machine machinev1beta1.Machine) error {
+	if machine.Status.NodeRef == nil {
+		notes := "machine stuck deleting with no node"
+		i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+		return nil
+	}
+	node, err := machineutil.GetNodeForMachine(ctx, i.kclient, machine)
+	if err != nil {
+		notes := "machine's node could not be determined"
+		i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+		return fmt.Errorf("failed to retrieve node for machine %q: %w", machine.Name, err)
+	}
+
+	stuck, duration := checkForStuckDrain(node)
+	if stuck {
+		notes := fmt.Sprintf("node %q found to be stuck draining for %s", node.Name, duration.Truncate(time.Second).String())
+		i.recommendations.addRecommendation(recommendationInvestigateNode, node.Name, notes)
+		return nil
+	}
+
+	notes := "unable to determine why machine is stuck deleting"
+	i.recommendations.addRecommendation(recommendationInvestigateMachine, machine.Name, notes)
+	return nil
+}
+
+// checkForStuckDrain makes a best-effort approximation at whether a node is stuck draining a specific pod
+func checkForStuckDrain(node corev1.Node) (bool, *time.Duration) {
+	if len(node.Spec.Taints) == 0 {
+		return false, nil
+	}
+
+	taint, found := nodeutil.FindNoScheduleTaint(node)
+	if !found {
+		return false, nil
+	}
+
+	// TODO - Once CAD can access on-cluster metrics, we can query the `pods_preventing_node_drain` metric from prometheus
+	// to more accurately gauge if a node is truly stuck deleting, and what pod is causing it
+	drainDuration := time.Since(taint.TimeAdded.Time)
+	if drainDuration > 10*time.Minute {
+		return true, &drainDuration
+	}
+
+	return false, nil
+}
+
+// InvestigateNodes examines the provided nodes and returns recommended actions, if any are needed
+func (i *Investigation) InvestigateNodes(nodes []corev1.Node) {
+	for _, node := range nodes {
+		i.InvestigateNode(node)
+	}
+}
+
+// InvestigateNode examines a node and determines if further investigation is required
+func (i *Investigation) InvestigateNode(node corev1.Node) {
+	roleLabel, found := nodeutil.GetRole(node)
+	if !found {
+		notes := fmt.Sprintf("no role label containing %q found for node", nodeutil.RoleLabelPrefix)
+		i.recommendations.addRecommendation(recommendationInvestigateNode, node.Name, notes)
+		return
+	} else if !strings.Contains(roleLabel, nodeutil.WorkerRoleSuffix) {
+		notes := "non-worker node affected"
+		i.recommendations.addRecommendation(recommendationInvestigateNode, node.Name, notes)
+		return
+	}
+
+	ready, found := nodeutil.FindReadyCondition(node)
+	if !found {
+		notes := "found no 'Ready' .Status.Condition for the node"
+		i.recommendations.addRecommendation(recommendationInvestigateNode, node.Name, notes)
+		return
+	}
+
+	if ready.Status != corev1.ConditionTrue {
+		lastCheckinElapsed := time.Since(ready.LastHeartbeatTime.Time).Truncate(time.Second)
+		notes := fmt.Sprintf("node has been %q for %s", ready.Status, lastCheckinElapsed)
+		i.recommendations.addRecommendation(recommendationInvestigateNode, node.Name, notes)
+	}
+}
+
+func (i *Investigation) Name() string {
+	return alertname
+}
+
+func (i *Investigation) Description() string {
+	return fmt.Sprintf("Investigates '%s' alerts", alertname)
+}
+
+func (i *Investigation) IsExperimental() bool {
+	return true
+}
+
+func (i *Investigation) ShouldInvestigateAlert(alert string) bool {
+	return strings.Contains(alert, alertname)
+}
diff --git a/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/machinehealthcheckunterminatedshortcircuitsre_test.go b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/machinehealthcheckunterminatedshortcircuitsre_test.go
new file mode 100644
index 00000000..4016f327
--- /dev/null
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/machinehealthcheckunterminatedshortcircuitsre_test.go
@@ -0,0 +1,573 @@
+package machinehealthcheckunterminatedshortcircuitsre
+
+import (
+	"context"
+	"fmt"
+	"slices"
+	"strings"
+	"testing"
+	"time"
+
+	machineutil "github.com/openshift/configuration-anomaly-detection/pkg/investigations/utils/machine"
+	nodeutil "github.com/openshift/configuration-anomaly-detection/pkg/investigations/utils/node"
+	"github.com/openshift/configuration-anomaly-detection/pkg/logging"
+	"github.com/openshift/configuration-anomaly-detection/pkg/notewriter"
+
+	machinev1beta1 "github.com/openshift/api/machine/v1beta1"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/scheme"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+)
+
+func TestInvestigation_getMachinesFromFailingMHC(t *testing.T) {
+	// Test objects
+	mhc := newFailingMachineHealthCheck()
+
+	mhcOwnedMachine1 := newWorkerMachine("mhc-owned-1")
+	mhcOwnedMachine2 := newWorkerMachine("mhc-owned-2")
+	irrelevantMachine := newWorkerMachine("irrelevant")
+	// clear the labels from this machine so that it's no longer owned by the mhc
+	irrelevantMachine.Labels = map[string]string{}
+
+	tests := []struct {
+		name    string
+		objects []client.Object
+		want    []string
+	}{
+		// test cases
+		{
+			name: "one valid machine",
+			objects: []client.Object{
+				mhc,
+				mhcOwnedMachine1,
+			},
+			want: []string{
+				mhcOwnedMachine1.Name,
+			},
+		},
+		{
+			name: "no valid machines",
+			objects: []client.Object{
+				mhc,
+			},
+			want: []string{},
+		},
+		{
+			name: "two valid machines",
+			objects: []client.Object{
+				mhc,
+				mhcOwnedMachine1,
+				mhcOwnedMachine2,
+			},
+			want: []string{
+				mhcOwnedMachine1.Name,
+				mhcOwnedMachine2.Name,
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Setup test
+			i, err := newTestInvestigation(tt.objects...)
+			if err != nil {
+				t.Errorf("failed to create test investigation: %v", err)
+				return
+			}
+
+			// Execute test
+			got, err := i.getMachinesFromFailingMHC(context.TODO())
+			if err != nil {
+				// Never expect error on this test
+				t.Errorf("Investigation.getTargetMachines() error = %v, expected no error", err)
+				return
+			}
+
+			// Check results
+			for _, wantedMachineName := range tt.want {
+				found := slices.ContainsFunc(got, func(gotMachine machinev1beta1.Machine) bool {
+					return gotMachine.Name == wantedMachineName
+				})
+				if !found {
+					t.Errorf("expected machine %q in returned list. Got the following instead: %#v", wantedMachineName, got)
+				}
+			}
+		})
+	}
+}
+
+func TestInvestigation_investigateFailingMachine(t *testing.T) {
+	// Test cases
+	type result struct {
+		err    bool
+		action recommendedAction
+		notes  string
+	}
+
+	tests := []struct {
+		name    string
+		machine func() *machinev1beta1.Machine
+		want    result
+	}{
+		{
+			name: "investigate when no role",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("role-less")
+				// clear the machine's labels to unassign it from the worker role
+				machine.Labels = map[string]string{}
+				return machine
+			},
+			want: result{
+				err:    true,
+				action: recommendationInvestigateMachine,
+				notes:  "unable to determine machine role",
+			},
+		},
+		{
+			name: "investigate when infra failing",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("infra")
+				machine.Labels[machineutil.RoleLabelKey] = "infra"
+				return machine
+			},
+			want: result{
+				err:    false,
+				action: recommendationInvestigateMachine,
+				notes:  "Red Hat-owned machine in state",
+			},
+		},
+		{
+			name: "delete when invalid IP",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("bad-ip")
+				reason := machinev1beta1.IPAddressInvalidReason
+				machine.Status.ErrorReason = &reason
+				return machine
+			},
+			want: result{
+				err:    false,
+				action: recommendationDeleteMachine,
+				notes:  "invalid IP address",
+			},
+		},
+		{
+			name: "delete when create failed",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("create-failed")
+				reason := machinev1beta1.CreateMachineError
+				machine.Status.ErrorReason = &reason
+				return machine
+			},
+			want: result{
+				err:    false,
+				action: recommendationDeleteMachine,
+				notes:  "machine failed to create",
+			},
+		},
+		{
+			name: "investigate when invalid configuration",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("invalid-config")
+				reason := machinev1beta1.InvalidConfigurationMachineError
+				machine.Status.ErrorReason = &reason
+				return machine
+			},
+			want: result{
+				err:    false,
+				action: recommendationInvestigateMachine,
+				notes:  "machine configuration is invalid",
+			},
+		},
+		{
+			name: "investigate when delete failed",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("delete-failed")
+				reason := machinev1beta1.DeleteMachineError
+				machine.Status.ErrorReason = &reason
+				return machine
+			},
+			want: result{
+				err:    false,
+				action: recommendationInvestigateMachine,
+				notes:  "the machine's node could not be gracefully terminated",
+			},
+		},
+		{
+			name: "servicelog when insufficient resources",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("insufficient-resources")
+				reason := machinev1beta1.InsufficientResourcesMachineError
+				machine.Status.ErrorReason = &reason
+				return machine
+			},
+			want: result{
+				err:    false,
+				action: recommendationQuotaServiceLog,
+				notes:  "a servicelog should be sent",
+			},
+		},
+		{
+			name: "investigate when missing .Status.ErrorReason",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("no-status")
+				machine.Status = machinev1beta1.MachineStatus{}
+				return machine
+			},
+			want: result{
+				err:    false,
+				action: recommendationInvestigateMachine,
+				notes:  "no .Status.ErrorReason found for machine",
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Setup test
+			machine := tt.machine()
+			i, err := newTestInvestigation(machine)
+			if err != nil {
+				t.Errorf("failed to create test investigation: %v", err)
+			}
+
+			// Execute test
+			err = i.investigateFailingMachine(*machine)
+
+			// Verify results
+			if (err != nil) != tt.want.err {
+				t.Errorf("unexpected result in Investigation.investigateFailingMachine(): wanted error = %t, returned err = %v", tt.want.err, err)
+			}
+
+			recs := i.recommendations[tt.want.action]
+			if len(recs) != 1 {
+				t.Errorf("expected exactly one recommendation: %q, got %d", tt.want.action, len(recs))
+			}
+			if !strings.Contains(recs[0].notes, tt.want.notes) {
+				t.Errorf("unexpected note in investigation recommendation: expected %q, got %q", tt.want.notes, recs[0].notes)
+			}
+		})
+	}
+}
+
+func TestInvestigation_investigateDeletingMachine(t *testing.T) {
+	type result struct {
+		err    bool
+		action recommendedAction
+		notes  string
+	}
+
+	tests := []struct {
+		name    string
+		objects func() (*machinev1beta1.Machine, *corev1.Node)
+		want    result
+	}{
+		{
+			name: "investigate when no .Status.NodeRef",
+			objects: func() (*machinev1beta1.Machine, *corev1.Node) {
+				machine := newWorkerMachine("no-noderef")
+				machine.Status = machinev1beta1.MachineStatus{}
+				return machine, &corev1.Node{}
+			},
+			want: result{
+				err:    false,
+				action: recommendationInvestigateMachine,
+				notes:  "machine stuck deleting with no node",
+			},
+		},
+		{
+			name: "investigate when no node for machine",
+			objects: func() (*machinev1beta1.Machine, *corev1.Node) {
+				machine := newWorkerMachine("lonely")
+				machine.Status.NodeRef = &corev1.ObjectReference{
+					Kind: "Node",
+					Name: "missing",
+				}
+				return machine, &corev1.Node{}
+			},
+			want: result{
+				err:    true,
+				action: recommendationInvestigateMachine,
+				notes:  "machine's node could not be determined",
+			},
+		},
+		{
+			name: "investigate when machine deletion reason unknown",
+			objects: func() (*machinev1beta1.Machine, *corev1.Node) {
+				machine := newWorkerMachine("stuck-deleting")
+				node := newWorkerNode("stuck-deleting")
+				machine.Status.NodeRef = &corev1.ObjectReference{
+					Kind: node.Kind,
+					Name: node.Name,
+				}
+				return machine, node
+			},
+			want: result{
+				err:    false,
+				action: recommendationInvestigateMachine,
+				notes:  "unable to determine why machine is stuck deleting",
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Setup test
+			machine, node := tt.objects()
+			i, err := newTestInvestigation(machine, node)
+			if err != nil {
+				t.Errorf("failed to create test investigation: %v", err)
+			}
+
+			// Execute test
+			err = i.investigateDeletingMachine(context.TODO(), *machine)
+
+			// Verify results
+			if (err != nil) != tt.want.err {
+				t.Errorf("unexpected result in Investigation.investigateDeletingMachine(): wanted error = %t, returned err = %v", tt.want.err, err)
+			}
+
+			recs := i.recommendations[tt.want.action]
+			if len(recs) != 1 {
+				t.Errorf("expected exactly one recommendation: %q, got %d", tt.want.action, len(recs))
+			}
+			if !strings.Contains(recs[0].notes, tt.want.notes) {
+				t.Errorf("unexpected note in investigation recommendation: expected %q, got %q", tt.want.notes, recs[0].notes)
+			}
+		})
+	}
+}
+
+func Test_checkForStuckDrain(t *testing.T) {
+	tests := []struct {
+		name  string
+		node  func() corev1.Node
+		stuck bool
+	}{
+		{
+			name: "not stuck when no taints",
+			node: func() corev1.Node {
+				node := newWorkerNode("taintless")
+				node.Spec.Taints = []corev1.Taint{}
+				return *node
+			},
+			stuck: false,
+		},
+		{
+			name: "not stuck when no NoSchedule taint",
+			node: func() corev1.Node {
+				node := newWorkerNode("scheduleable")
+				node.Spec.Taints = []corev1.Taint{
+					{
+						Effect: corev1.TaintEffectNoExecute,
+					},
+					{
+						Effect: corev1.TaintEffectPreferNoSchedule,
+					},
+				}
+				return *node
+			},
+			stuck: false,
+		},
+		{
+			name: "not stuck when short-lived NoSchedule taint",
+			node: func() corev1.Node {
+				node := newWorkerNode("recently-unscheduleable")
+				node.Spec.Taints = []corev1.Taint{
+					{
+						Effect: corev1.TaintEffectNoSchedule,
+						TimeAdded: &metav1.Time{
+							Time: time.Now(),
+						},
+					},
+				}
+				return *node
+			},
+			stuck: false,
+		},
+		{
+			name: "stuck when long-lived NoSchedule taint",
+			node: func() corev1.Node {
+				node := newWorkerNode("actually-stuck")
+				node.Spec.Taints = []corev1.Taint{
+					{
+						Effect: corev1.TaintEffectNoSchedule,
+						TimeAdded: &metav1.Time{
+							Time: time.Now().Add(-1 * time.Hour),
+						},
+					},
+				}
+				return *node
+			},
+			stuck: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, duration := checkForStuckDrain(tt.node())
+			if got != tt.stuck {
+				t.Errorf("unexpected result: expected stuck = %t, got %t", tt.stuck, got)
+			}
+			if got && duration == nil {
+				t.Errorf("unexpected result: got stuck = %t, but duration = %v", got, duration)
+			}
+		})
+	}
+}
+
+func TestInvestigation_InvestigateNode(t *testing.T) {
+	type result struct {
+		action recommendedAction
+		notes  string
+	}
+
+	tests := []struct {
+		name string
+		node func() *corev1.Node
+		want result
+	}{
+		{
+			name: "investigate when no role label",
+			node: func() *corev1.Node {
+				node := newWorkerNode("labelless")
+				node.Labels = map[string]string{}
+				return node
+			},
+			want: result{
+				action: recommendationInvestigateNode,
+				notes:  "no role label",
+			},
+		},
+		{
+			name: "investigate when bad role label",
+			node: func() *corev1.Node {
+				node := newWorkerNode("infra")
+				node.Labels = map[string]string{fmt.Sprintf("%s/infra", nodeutil.RoleLabelPrefix): ""}
+				return node
+			},
+			want: result{
+				action: recommendationInvestigateNode,
+				notes:  "non-worker node affected",
+			},
+		},
+		{
+			name: "investigate when no Ready condition",
+			node: func() *corev1.Node {
+				node := newWorkerNode("unReady")
+				node.Status.Conditions = []corev1.NodeCondition{}
+				return node
+			},
+			want: result{
+				action: recommendationInvestigateNode,
+				notes:  "found no 'Ready' .Status.Condition",
+			},
+		},
+		{
+			name: "investigate when not Ready",
+			node: func() *corev1.Node {
+				node := newWorkerNode("notReady")
+				node.Status.Conditions = []corev1.NodeCondition{
+					{
+						Type:   corev1.NodeReady,
+						Status: corev1.ConditionFalse,
+					},
+				}
+				return node
+			},
+			want: result{
+				action: recommendationInvestigateNode,
+				notes:  "node has been",
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Setup test
+			node := tt.node()
+			i, err := newTestInvestigation(node)
+			if err != nil {
+				t.Errorf("failed to create new test investigation: %v", err)
+			}
+
+			// Execute test
+			i.InvestigateNode(*node)
+
+			// Validate results
+			recs := i.recommendations[tt.want.action]
+			if len(recs) != 1 {
+				t.Errorf("expected exactly one recommendation: %q, got %d", tt.want.action, len(recs))
+			}
+			if !strings.Contains(recs[0].notes, tt.want.notes) {
+				t.Errorf("unexpected note in investigation recommendation: expected %q, got %q", tt.want.notes, recs[0].notes)
+			}
+		})
+	}
+}
+
+func newFailingMachineHealthCheck() *machinev1beta1.MachineHealthCheck {
+	mhc := &machinev1beta1.MachineHealthCheck{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-mhc",
+			Namespace: machineutil.MachineNamespace,
+		},
+		Spec: machinev1beta1.MachineHealthCheckSpec{
+			Selector: metav1.LabelSelector{
+				MatchLabels: map[string]string{"mhc-name": "test-mhc"},
+			},
+		},
+		Status: machinev1beta1.MachineHealthCheckStatus{
+			Conditions: []machinev1beta1.Condition{
+				{
+					Type:   machinev1beta1.RemediationAllowedCondition,
+					Status: corev1.ConditionFalse,
+				},
+			},
+		},
+	}
+	return mhc
+}
+
+func newWorkerMachine(name string) *machinev1beta1.Machine {
+	m := &machinev1beta1.Machine{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: machineutil.MachineNamespace,
+			Labels:    map[string]string{"mhc-name": "test-mhc", machineutil.RoleLabelKey: machineutil.WorkerRoleLabelValue},
+		},
+	}
+	return m
+}
+
+func newWorkerNode(name string) *corev1.Node {
+	n := &corev1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   name,
+			Labels: map[string]string{fmt.Sprintf("%s/%s", nodeutil.RoleLabelPrefix, nodeutil.WorkerRoleSuffix): ""},
+		},
+	}
+	return n
+}
+
+func newFakeClient(objs ...client.Object) (client.Client, error) {
+	s := scheme.Scheme
+	err := machinev1beta1.AddToScheme(s)
+	if err != nil {
+		return nil, err
+	}
+
+	client := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build()
+	return client, nil
+}
+
+func newTestInvestigation(testObjects ...client.Object) (Investigation, error) {
+	fakeClient, err := newFakeClient(testObjects...)
+	if err != nil {
+		return Investigation{}, err
+	}
+
+	i := Investigation{
+		kclient:         fakeClient,
+		notes:           notewriter.New("testing", logging.RawLogger),
+		recommendations: investigationRecommendations{},
+	}
+	return i, nil
+}
diff --git a/pkg/investigations/machineHealthCheckUnterminatedShortCircuitSRE/metadata.yaml b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/metadata.yaml
similarity index 94%
rename from pkg/investigations/machineHealthCheckUnterminatedShortCircuitSRE/metadata.yaml
rename to pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/metadata.yaml
index 58056751..b8ec447e 100644
--- a/pkg/investigations/machineHealthCheckUnterminatedShortCircuitSRE/metadata.yaml
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/metadata.yaml
@@ -19,4 +19,4 @@ rbac:
         - ""
       resources:
         - "nodes"
-customerDataAccess: true
+customerDataAccess: false
diff --git a/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/recommendation.go b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/recommendation.go
new file mode 100644
index 00000000..467566b9
--- /dev/null
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/recommendation.go
@@ -0,0 +1,73 @@
+/*
+machinehealthcheckunterminatedshortcircuitsre defines the investigation logic for the MachineHealthCheckUnterminatedShortCircuitSRE alert
+*/
+package machinehealthcheckunterminatedshortcircuitsre
+
+import (
+	"fmt"
+
+	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/utils/machine"
+)
+
+// machineRecommendations categorizes each machine's individual investigation summary into a recommended course of action
+type investigationRecommendations map[recommendedAction][]investigationResult
+
+func (r investigationRecommendations) addRecommendation(action recommendedAction, object string, notes string) {
+	recommendation := investigationResult{
+		object: object,
+		notes:  notes,
+	}
+	r[action] = append(r[action], recommendation)
+}
+
+// summarize prints the machine investigationRecommendations into a human read-able format.
+func (r investigationRecommendations) summarize() string {
+	msg := ""
+	for recommendation, investigations := range r {
+		msg += fmt.Sprintf("%s:\n", recommendation)
+
+		if recommendation == recommendationDeleteMachine {
+			// Consolidate all machine deletion requests into a single oc command for ease of use
+			deleteCmd := fmt.Sprintf("oc delete machine -n %s", machine.MachineNamespace)
+			for _, investigation := range investigations {
+				msg += fmt.Sprintf("- %s\n", investigation.String())
+				deleteCmd += " " + investigation.object
+			}
+			msg += fmt.Sprintf("to delete these machines, run:\n\n%s\n", deleteCmd)
+		} else {
+			for _, investigation := range investigations {
+				msg += fmt.Sprintf("- %s\n", investigation.String())
+			}
+		}
+
+		msg += "\n"
+	}
+	return msg
+}
+
+type investigationResult struct {
+	// name indicates which object was investigated
+	object string
+	// notes provides a high-level summary of the investigation results
+	notes string
+}
+
+func (s *investigationResult) String() string {
+	msg := fmt.Sprintf("%q: %s", s.object, s.notes)
+	return msg
+}
+
+// recommendedAction acts as both a key in the investigationRecommendations map, as well as a header for pagerduty notes when summarize()-ing
+type recommendedAction string
+
+const (
+	// recommendationDeleteMachine indicates that the machine(s) in question should be deleted so the machine-api can reprovision them
+	recommendationDeleteMachine recommendedAction = "delete the following machines"
+	// recommendationInvestigateMachine indicates that the machine(s) in question need to be manually investigated
+	recommendationInvestigateMachine recommendedAction = "investigate the following machines"
+	// recommendationQuotaServiceLog indicates that the machine(s) in question need to be remediated by the customer, and SRE should notify them
+	// of that fact via servicelog
+	recommendationQuotaServiceLog recommendedAction = "send a service log regarding quota issues for the following machines"
+	// recommendationInvestigateNode indicates that the machine's node object is reporting problems which require human intervention to resolve
+	recommendationInvestigateNode recommendedAction = "investigate the following nodes"
+)
diff --git a/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/README.md b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/README.md
new file mode 100644
index 00000000..bb0846c0
--- /dev/null
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/README.md
@@ -0,0 +1,83 @@
+# Testing MachineHealthCheckUnterminatedShortCircuitSRE
+
+The `MachineHealthCheckUnterminatedShortCircuitSRE` alert is derived from the `MachineHealthCheck` objects in the `openshift-machine-api` namespace on the cluster. Specifically, it is triggered when the number of nodes matching one of the `.spec.unhealthyConditions` meets or exceeds the `.spec.maxUnhealthy` value for a [duration of time](https://github.com/openshift/managed-cluster-config/blob/3338dd375fa6517d7768eca985c3ca115bbc1484/deploy/sre-prometheus/100-machine-health-check-unterminated-short-circuit.PrometheusRule.yaml#L16).
+
+
+## Setup
+
+Before applying any test configuration, be sure to [pause hive syncsetting](https://github.com/openshift/ops-sop/blob/master/v4/knowledge_base/pause-syncset.md), to avoid having these changes overwritten mid-test.
+
+Then, apply the following changes to the `MachineHealthCheck` object(s) you wish to check against, in order to make testing a little easier:
+- Reducing the `.spec.maxUnhealthy` value will lower the number of nodes that need to be "broken" to short-circuit the machine-api-operator and halt remediation
+- Reducing the `.spec.unhealthyConditions` timeouts ensures that the machine-api short-circuits much more quickly after modifying the nodes
+
+A patched version of the default `MachineHealthCheck/srep-worker-healthcheck` object is pre-configured [here](./srep-worker-healthcheck_machinehealthcheck.yaml). Use the following command to apply it to your test cluster:
+
+```sh
+ocm backplane elevate "testing CAD" -- replace -f ./srep-worker-healthcheck_machinehealthcheck.yaml
+```
+
+## Test Cases
+
+Because it has a fairly broad definition, a `MachineHealthCheckUnterminatedShortCircuitSRE` alert could fire as a result of several different scenarios. A few are outlined below, along with methods to reproduce them.
+
+### nodes `NotReady`, machines `Running`
+
+While the `machine-api-operator` owns and operates the `machine` object-type, it's important to note that its `MachineHealthCheck` objects actually utilize the `.status` of the corresponding **node** to determine if a machine is healthy. This is because a machine's status only reflects whether the VM in the cloud provider is running or not, while the node's status indicates whether the instance is a functional part of the cluster. Therefore, it's possible for a `MachineHealthCheckUnterminatedShortCircuitSRE` alert to fire while all `machines` have a `.status.phase` of `Running`.
+
+The simplest way to reproduce this is to login onto the node and stop the `kubelet.service` on multiple nodes at once.
+
+
+This can be done via the debug command:
+
+```sh
+ocm backplane elevate "testing CAD" -- debug node/$NODE
+```
+
+inside the container, run:
+
+```sh
+chroot /host
+systemctl stop kubelet.service
+```
+
+This should automatically remove the debug pod. The node status should flip to `NotReady` shortly thereafter.
+
+### Nodes stuck deleting due to customer workloads not draining
+
+Components like the cluster-autoscaler will try to automatically manage cluster machines via scale-up/scale-down actions based on overall resource utilization. If workloads cannot be drained during a scale-down operation, several nodes can get stuck in a `SchedulingDisabled` phase concurrently, triggering the alert.
+
+To simulate this, create a pod that will have to be drained prior to deleting the machine, alongside an (incorrectly configured) PDB that will prevent it from being drained:
+
+```sh
+ocm backplane elevate "testing CAD" -- create -f ./unstoppable_workload.yaml -f ./unstoppable_pdb.yaml
+```
+
+Next, simulate a scale-down by patching the machineset the pod is running on. *NOTE*: Just deleting the machine will result in it being replaced, and will not trigger a MachineHealthCheckUnterminatedShortCircuitSRE alert.
+```sh
+# Get pod's node
+NODE=$(oc get po -n default -l app=test-cad -o jsonpath="{.items[].spec.nodeName}")
+# Get node's machineset
+MACHINESET=$(oc get machines -A -o json | jq --arg NODE "${NODE}" -r '.items[] | select(.status.nodeRef.name == $NODE) | .metadata.labels["machine.openshift.io/cluster-api-machineset"]')
+# Scale node's machineset
+oc scale machineset/${MACHINESET} --replicas=0 -n openshift-machine-api
+```
+
+### Machines in `Failed` phase
+
+Having several machines in a `Failed` state still violates a `MachineHealthCheck`'s `.status.maxUnhealthy`, despite the machines not having any corresponding nodes to check against.
+
+One method to simulate this is to edit the machineset so it contains invalid configurations. The following patch updates a worker machineset to use the `fakeinstancetype` machine-type, for example:
+
+```sh
+ocm backplane elevate "testing CAD" -- patch machinesets $MACHINESET -n openshift-machine-api --type merge -p '{"spec": {"template": {"spec": {"providerSpec": {"value": {"instanceType": "fakeinstancetype"}}}}}}'
+oc delete machine -n openshift-machine-api -l machine.openshift.io/cluster-api-machineset=$MACHINESET
+```
+
+## Additional Resources
+The following pages may be useful if the information in this guide is insufficient or has become stale.
+
+- [Machine API Brief](https://github.com/openshift/machine-api-operator/blob/main/docs/user/machine-api-operator-overview.md)
+- [Machine API FAQ](https://github.com/openshift/machine-api-operator/blob/main/FAQ.md)
+- [MachineHealthCheck documentation](https://docs.redhat.com/en/documentation/openshift_container_platform/4.17/html/machine_management/deploying-machine-health-checks#machine-health-checks-resource_deploying-machine-health-checks)
+- [Alert SOP](https://github.com/openshift/ops-sop/blob/master/v4/alerts/MachineHealthCheckUnterminatedShortCircuitSRE.md)
diff --git a/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/srep-worker-healthcheck_machinehealthcheck.yaml b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/srep-worker-healthcheck_machinehealthcheck.yaml
new file mode 100644
index 00000000..9fd7da3b
--- /dev/null
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/srep-worker-healthcheck_machinehealthcheck.yaml
@@ -0,0 +1,53 @@
+apiVersion: machine.openshift.io/v1beta1
+kind: MachineHealthCheck
+metadata:
+  name: srep-worker-healthcheck
+  namespace: openshift-machine-api
+spec:
+  maxUnhealthy: 0
+  nodeStartupTimeout: 25m
+  selector:
+    matchExpressions:
+    - key: machine.openshift.io/cluster-api-machine-role
+      operator: NotIn
+      values:
+      - infra
+      - master
+    - key: machine.openshift.io/cluster-api-machineset
+      operator: Exists
+    - key: machine.openshift.io/instance-type
+      operator: NotIn
+      values:
+      - m5.metal
+      - m5d.metal
+      - m5n.metal
+      - m5dn.metal
+      - m5zn.metal
+      - m6a.metal
+      - m6i.metal
+      - m6id.metal
+      - r5.metal
+      - r5d.metal
+      - r5n.metal
+      - r5dn.metal
+      - r6a.metal
+      - r6i.metal
+      - r6id.metal
+      - x2iezn.metal
+      - z1d.metal
+      - c5.metal
+      - c5d.metal
+      - c5n.metal
+      - c6a.metal
+      - c6i.metal
+      - c6id.metal
+      - i3.metal
+      - i3en.metal
+      - r7i.48xlarge
+  unhealthyConditions:
+  - status: "False"
+    timeout: 10s
+    type: Ready
+  - status: Unknown
+    timeout: 10s
+    type: Ready
diff --git a/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/unstoppable_pdb.yaml b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/unstoppable_pdb.yaml
new file mode 100644
index 00000000..13d4a4d4
--- /dev/null
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/unstoppable_pdb.yaml
@@ -0,0 +1,11 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: test-cad
+  namespace: default
+spec:
+  maxUnavailable: 0
+  selector:
+    matchLabels:
+      app: "test-cad"
+  unhealthyPodEvictionPolicy: AlwaysAllow
diff --git a/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/unstoppable_workload.yaml b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/unstoppable_workload.yaml
new file mode 100644
index 00000000..9d7d3af8
--- /dev/null
+++ b/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/testing/unstoppable_workload.yaml
@@ -0,0 +1,33 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app: "test-cad"
+  name: test-cad
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: "test-cad"
+  template:
+    metadata:
+      labels:
+        app: "test-cad"
+    spec:
+      affinity:
+        nodeAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - preference:
+              matchExpressions:
+              - key: node-role.kubernetes.io/worker
+                operator: Exists
+            weight: 1
+      containers:
+      - command:
+          - "sleep"
+          - "infinity"
+        image: "quay.io/app-sre/ubi8-ubi:latest"
+        imagePullPolicy: IfNotPresent
+        name: test
+      restartPolicy: Always
diff --git a/pkg/investigations/registry.go b/pkg/investigations/registry.go
index a278b40b..bc0402bd 100644
--- a/pkg/investigations/registry.go
+++ b/pkg/investigations/registry.go
@@ -7,6 +7,7 @@ import (
 	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/cpd"
 	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/insightsoperatordown"
 	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/investigation"
+	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/machinehealthcheckunterminatedshortcircuitsre"
 	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/upgradeconfigsyncfailureover4hr"
 )
 
@@ -18,6 +19,7 @@ var availableInvestigations = []investigation.Investigation{
 	&cpd.Investigation{},
 	&insightsoperatordown.Investigation{},
 	&upgradeconfigsyncfailureover4hr.Investigation{},
+	&machinehealthcheckunterminatedshortcircuitsre.Investigation{},
 }
 
 // GetInvestigation returns the first Investigation that applies to the given alert title.
diff --git a/pkg/investigations/utils/machine/machine.go b/pkg/investigations/utils/machine/machine.go
new file mode 100644
index 00000000..079fb116
--- /dev/null
+++ b/pkg/investigations/utils/machine/machine.go
@@ -0,0 +1,97 @@
+package machine
+
+import (
+	"context"
+	"fmt"
+
+	machinev1beta1 "github.com/openshift/api/machine/v1beta1"
+	"github.com/openshift/configuration-anomaly-detection/pkg/investigations/utils/node"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+const (
+	MachineNamespace     = "openshift-machine-api"
+	RoleLabelKey         = "machine.openshift.io/cluster-api-machine-role"
+	WorkerRoleLabelValue = "worker"
+)
+
+// HealthcheckRemediationAllowed searches the status conditions for the machinehealthcheck object and determines if remediation is allowed
+func HealthcheckRemediationAllowed(healthcheck machinev1beta1.MachineHealthCheck) bool {
+	for _, condition := range healthcheck.Status.Conditions {
+		if condition.Type == machinev1beta1.RemediationAllowedCondition && condition.Status == corev1.ConditionTrue {
+			// Only rule out that the mhc is failing if we can both find the condition and determine its current status
+			return true
+		}
+	}
+	return false
+}
+
+// GetMachinesForMHC retrieves the machines managed by the given MachineHealthCheck object
+func GetMachinesForMHC(ctx context.Context, kclient client.Client, healthcheck machinev1beta1.MachineHealthCheck) ([]machinev1beta1.Machine, error) {
+	machines := machinev1beta1.MachineList{}
+	selector, err := metav1.LabelSelectorAsSelector(&healthcheck.Spec.Selector)
+	if err != nil {
+		return []machinev1beta1.Machine{}, fmt.Errorf("failed to convert machinehealthcheck %q .spec.selector: %w", healthcheck.Name, err)
+	}
+	err = kclient.List(ctx, &machines, client.MatchingLabelsSelector{Selector: selector}, &client.ListOptions{Namespace: MachineNamespace})
+	if err != nil {
+		return []machinev1beta1.Machine{}, fmt.Errorf("failed to retrieve machines from machinehealthcheck %q: %w", healthcheck.Name, err)
+	}
+	return machines.Items, nil
+}
+
+// GetMachineRole returns the role of the given machine, if present. If not found, an error is returned
+func GetRole(machine machinev1beta1.Machine) (string, error) {
+	role, found := machine.Labels[RoleLabelKey]
+	if !found {
+		return "", fmt.Errorf("expected label key %q not found", RoleLabelKey)
+	}
+	return role, nil
+}
+
+// GetNodesForMachines retrieves the nodes for the given machines. Errors encountered are joined, but do not block the retrieval of other machines
+func GetNodesForMachines(ctx context.Context, kclient client.Client, machines []machinev1beta1.Machine) ([]corev1.Node, error) {
+	// Retrieving all nodes initially & filtering out irrelevant objects results in fewer API calls
+	nodes, err := node.GetAll(ctx, kclient)
+	if err != nil {
+		return []corev1.Node{}, fmt.Errorf("failed to retrieve nodes: %w", err)
+	}
+
+	matches := []corev1.Node{}
+	for _, machine := range machines {
+		node, found := findMatchingNode(machine, nodes)
+		if found {
+			matches = append(matches, node)
+		}
+	}
+	return matches, nil
+}
+
+// findMatchingNode retrieves the node owned by the provided machine, if one exists, along with a boolean indicating whether
+// the search succeeded
+func findMatchingNode(machine machinev1beta1.Machine, nodes []corev1.Node) (corev1.Node, bool) {
+	if machine.Status.NodeRef == nil || machine.Status.NodeRef.Name == "" {
+		return corev1.Node{}, false
+	}
+	for _, node := range nodes {
+		if machine.Status.NodeRef.Name == node.Name {
+			return node, true
+		}
+	}
+
+	return corev1.Node{}, false
+}
+
+// GetNodeForMachine retrieves the node for the given machine. If the provided machine's .Status.NodeRef is empty,
+// an error is returned
+func GetNodeForMachine(ctx context.Context, kclient client.Client, machine machinev1beta1.Machine) (corev1.Node, error) {
+	if machine.Status.NodeRef == nil || machine.Status.NodeRef.Name == "" {
+		return corev1.Node{}, fmt.Errorf("no .Status.NodeRef defined for machine %q", machine.Name)
+	}
+	node := &corev1.Node{}
+	err := kclient.Get(ctx, types.NamespacedName{Name: machine.Status.NodeRef.Name}, node)
+	return *node, err
+}
diff --git a/pkg/investigations/utils/machine/machine_test.go b/pkg/investigations/utils/machine/machine_test.go
new file mode 100644
index 00000000..8011c21e
--- /dev/null
+++ b/pkg/investigations/utils/machine/machine_test.go
@@ -0,0 +1,339 @@
+package machine
+
+import (
+	"context"
+	"testing"
+
+	machinev1beta1 "github.com/openshift/api/machine/v1beta1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/scheme"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+)
+
+func Test_HealthcheckRemediationAllowed(t *testing.T) {
+	// Test objects
+	failingMhc := newFailingMachineHealthCheck()
+
+	// modify the status conditions on this mhc to indicate it's actually working
+	workingMhc := newFailingMachineHealthCheck()
+	workingMhc.Status.Conditions = []machinev1beta1.Condition{
+		{
+			Type:   machinev1beta1.RemediationAllowedCondition,
+			Status: corev1.ConditionTrue,
+		},
+	}
+
+	missingConditionMhc := newFailingMachineHealthCheck()
+	missingConditionMhc.Status.Conditions = []machinev1beta1.Condition{}
+
+	// Test cases
+	tests := []struct {
+		name string
+		mhc  machinev1beta1.MachineHealthCheck
+		want bool
+	}{
+		{
+			name: "failing condition present",
+			mhc:  *failingMhc,
+			want: false,
+		},
+		{
+			name: "no condition present",
+			mhc:  *missingConditionMhc,
+			want: false,
+		},
+		{
+			name: "working condition present",
+			mhc:  *workingMhc,
+			want: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := HealthcheckRemediationAllowed(tt.mhc); got != tt.want {
+				t.Errorf("Investigation.healthcheckRemediationAllowed() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_GetMachineRole(t *testing.T) {
+	type result struct {
+		err  bool
+		role string
+	}
+
+	tests := []struct {
+		name    string
+		machine func() *machinev1beta1.Machine
+		want    result
+	}{
+		{
+			name: "error when no role label",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("role-less")
+				machine.Labels = map[string]string{}
+				return machine
+			},
+			want: result{
+				err: true,
+			},
+		},
+		{
+			name: "role returned when present",
+			machine: func() *machinev1beta1.Machine {
+				machine := newWorkerMachine("worker")
+				machine.Labels = map[string]string{RoleLabelKey: WorkerRoleLabelValue}
+				return machine
+			},
+			want: result{
+				err:  false,
+				role: WorkerRoleLabelValue,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			machine := tt.machine()
+			got, err := GetRole(*machine)
+
+			if (err != nil) != tt.want.err {
+				t.Errorf("unexpected test result: wanted err = %t, returned err = %v", tt.want.err, err)
+			}
+			if got != tt.want.role {
+				t.Errorf("unexpected test result: wanted role = %q, got role = %q", tt.want.role, got)
+			}
+		})
+	}
+}
+
+func Test_findMatchingNode(t *testing.T) {
+	type result struct {
+		name  string
+		found bool
+	}
+
+	tests := []struct {
+		name    string
+		objects func() (*machinev1beta1.Machine, []corev1.Node)
+		want    result
+	}{
+		{
+			name: "false when machine missing noderef",
+			objects: func() (*machinev1beta1.Machine, []corev1.Node) {
+				machine := newWorkerMachine("no-node")
+				machine.Status = machinev1beta1.MachineStatus{}
+				return machine, []corev1.Node{}
+			},
+			want: result{
+				found: false,
+			},
+		},
+		{
+			name: "false when machine nodeRef has no name",
+			objects: func() (*machinev1beta1.Machine, []corev1.Node) {
+				machine := newWorkerMachine("no-node")
+				machine.Status.NodeRef = &corev1.ObjectReference{
+					Name: "",
+				}
+				return machine, []corev1.Node{}
+			},
+			want: result{
+				found: false,
+			},
+		},
+		{
+			name: "false when no matching node exists",
+			objects: func() (*machinev1beta1.Machine, []corev1.Node) {
+				machine := newWorkerMachine("lonely")
+				machine.Status.NodeRef = &corev1.ObjectReference{
+					Name: "imaginary",
+				}
+
+				nodes := []corev1.Node{*newWorkerNode("node1"), *newWorkerNode("node2")}
+				return machine, nodes
+			},
+			want: result{
+				found: false,
+			},
+		},
+		{
+			name: "found when node exists",
+			objects: func() (*machinev1beta1.Machine, []corev1.Node) {
+				machine := newWorkerMachine("machine1")
+				machine.Status.NodeRef = &corev1.ObjectReference{
+					Name: "node1",
+				}
+
+				nodes := []corev1.Node{*newWorkerNode("node1"), *newWorkerNode("node2")}
+				return machine, nodes
+			},
+			want: result{
+				found: true,
+				name:  "node1",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Setup
+			machine, nodes := tt.objects()
+			objects := []client.Object{machine}
+			for _, node := range nodes {
+				objects = append(objects, &node)
+			}
+
+			// Execute test
+			got, found := findMatchingNode(*machine, nodes)
+
+			// Validate results
+			if found != tt.want.found {
+				t.Errorf("unexpected test result: expected found = %t, got = %t", tt.want.found, found)
+			}
+			if got.Name != tt.want.name {
+				t.Errorf("unexpected test result: expected node named = %#v, got = %#v", tt.want.name, got)
+			}
+		})
+	}
+}
+
+func Test_GetNodeForMachine(t *testing.T) {
+	type result struct {
+		err  bool
+		name string
+	}
+	tests := []struct {
+		name    string
+		objects func() (*machinev1beta1.Machine, []*corev1.Node)
+		want    result
+	}{
+		{
+			name: "error when machine missing nodeRef",
+			objects: func() (*machinev1beta1.Machine, []*corev1.Node) {
+				machine := newWorkerMachine("missing-noderef")
+				machine.Status = machinev1beta1.MachineStatus{}
+				return machine, []*corev1.Node{}
+			},
+			want: result{
+				err: true,
+			},
+		},
+		{
+			name: "error when node does not exist",
+			objects: func() (*machinev1beta1.Machine, []*corev1.Node) {
+				machine := newWorkerMachine("invalid-noderef")
+				machine.Status.NodeRef = &corev1.ObjectReference{
+					Name: "missing-node",
+				}
+
+				node1 := newWorkerNode("node1")
+				node2 := newWorkerNode("node2")
+				return machine, []*corev1.Node{node1, node2}
+			},
+			want: result{
+				err: true,
+			},
+		},
+		{
+			name: "return node when exists",
+			objects: func() (*machinev1beta1.Machine, []*corev1.Node) {
+				machine := newWorkerMachine("valid")
+				node1 := newWorkerNode("node1")
+				node2 := newWorkerNode("node2")
+				machine.Status.NodeRef = &corev1.ObjectReference{
+					Name: node1.Name,
+				}
+				return machine, []*corev1.Node{node1, node2}
+			},
+			want: result{
+				err:  false,
+				name: "node1",
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Setup test
+			machine, nodes := tt.objects()
+			objs := []client.Object{machine}
+			for _, node := range nodes {
+				objs = append(objs, node)
+			}
+
+			kclient, err := newFakeClient(objs...)
+			if err != nil {
+				t.Errorf("failed to create fake client for testing: %v", err)
+			}
+
+			// Execute
+			got, err := GetNodeForMachine(context.TODO(), kclient, *machine)
+
+			// Validate results
+			if (err != nil) != tt.want.err {
+				t.Errorf("unexpected result in Investigation.getNodeForMachine(): wanted error = %t, returned err = %v", tt.want.err, err)
+			}
+			if got.Name != tt.want.name {
+				t.Errorf("incorrect node returned: expected %q, got %q", tt.want.name, got.Name)
+			}
+		})
+	}
+}
+
+func newFailingMachineHealthCheck() *machinev1beta1.MachineHealthCheck {
+	mhc := &machinev1beta1.MachineHealthCheck{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-mhc",
+			Namespace: MachineNamespace,
+		},
+		Spec: machinev1beta1.MachineHealthCheckSpec{
+			Selector: metav1.LabelSelector{
+				MatchLabels: map[string]string{"mhc-name": "test-mhc"},
+			},
+		},
+		Status: machinev1beta1.MachineHealthCheckStatus{
+			Conditions: []machinev1beta1.Condition{
+				{
+					Type:   machinev1beta1.RemediationAllowedCondition,
+					Status: corev1.ConditionFalse,
+				},
+			},
+		},
+	}
+	return mhc
+}
+
+func newWorkerMachine(name string) *machinev1beta1.Machine {
+	m := &machinev1beta1.Machine{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: MachineNamespace,
+			Labels:    map[string]string{"mhc-name": "test-mhc", RoleLabelKey: WorkerRoleLabelValue},
+		},
+	}
+	return m
+}
+
+func newWorkerNode(name string) *corev1.Node {
+	n := &corev1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   name,
+			Labels: map[string]string{"node-role/kubernetes.io/worker": ""},
+		},
+	}
+	return n
+}
+
+func newFakeClient(objs ...client.Object) (client.Client, error) {
+	s := scheme.Scheme
+	err := machinev1beta1.AddToScheme(s)
+	if err != nil {
+		return nil, err
+	}
+
+	client := fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build()
+	return client, nil
+}
diff --git a/pkg/investigations/utils/node/node.go b/pkg/investigations/utils/node/node.go
new file mode 100644
index 00000000..f38f70f4
--- /dev/null
+++ b/pkg/investigations/utils/node/node.go
@@ -0,0 +1,57 @@
+/*
+node defines investigation utility logic related to node objects
+*/
+package node
+
+import (
+	"context"
+	"strings"
+
+	corev1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+const (
+	RoleLabelPrefix  = "node-role.kubernetes.io"
+	WorkerRoleSuffix = "worker"
+)
+
+// FindNoScheduleTaint searches the node's taints to find one with effect: NoSchedule, if present.
+//
+// If none is present, an empty taint and 'false' are returned
+func FindNoScheduleTaint(node corev1.Node) (corev1.Taint, bool) {
+	for _, taint := range node.Spec.Taints {
+		if taint.Effect == corev1.TaintEffectNoSchedule {
+			return taint, true
+		}
+	}
+	return corev1.Taint{}, false
+}
+
+// GetNodes retrieves all nodes present in the cluster
+func GetAll(ctx context.Context, kclient client.Client) ([]corev1.Node, error) {
+	nodes := corev1.NodeList{}
+	err := kclient.List(ctx, &nodes)
+	return nodes.Items, err
+}
+
+// FindReadyCondition searches a node's .Status for the NodeReady condition, and returns it alongside a boolean value which
+// indicates whether the condition was found or not
+func FindReadyCondition(node corev1.Node) (corev1.NodeCondition, bool) {
+	for _, condition := range node.Status.Conditions {
+		if condition.Type == corev1.NodeReady {
+			return condition, true
+		}
+	}
+	return corev1.NodeCondition{}, false
+}
+
+// GetNodeRole returns the role of the provided node
+func GetRole(node corev1.Node) (string, bool) {
+	for label := range node.Labels {
+		if strings.Contains(label, RoleLabelPrefix) {
+			return label, true
+		}
+	}
+	return "", false
+}
diff --git a/pkg/investigations/utils/node/node_test.go b/pkg/investigations/utils/node/node_test.go
new file mode 100644
index 00000000..7868c90e
--- /dev/null
+++ b/pkg/investigations/utils/node/node_test.go
@@ -0,0 +1,204 @@
+package node
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func Test_FindNoScheduleTaint(t *testing.T) {
+	type result struct {
+		found bool
+		key   string
+	}
+
+	tests := []struct {
+		name string
+		node func() *corev1.Node
+		want result
+	}{
+		{
+			name: "false when taint missing",
+			node: func() *corev1.Node {
+				node := newWorkerNode("taintless")
+				node.Spec.Taints = []corev1.Taint{}
+				return node
+			},
+			want: result{
+				found: false,
+			},
+		},
+		{
+			name: "return taint when it exists",
+			node: func() *corev1.Node {
+				node := newWorkerNode("tainted")
+				node.Spec.Taints = []corev1.Taint{
+					{
+						Key:    "tainted",
+						Effect: corev1.TaintEffectNoSchedule,
+					},
+				}
+				return node
+			},
+			want: result{
+				key:   "tainted",
+				found: true,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			node := tt.node()
+
+			got, found := FindNoScheduleTaint(*node)
+			if found != tt.want.found {
+				t.Errorf("unexpected test result: expected found = %t, got = %t", tt.want.found, found)
+			}
+			if tt.want.found {
+				if got.Effect != corev1.TaintEffectNoSchedule {
+					t.Errorf("unexpected test result: expected taint effect = %q, got = %q", corev1.TaintEffectNoSchedule, got.Effect)
+				}
+				if got.Key != tt.want.key {
+					t.Errorf("unexpected test result: expected taint with key = %q, got = %q", tt.want.key, got.Key)
+				}
+			}
+		})
+	}
+}
+
+func Test_FindNodeReadyCondition(t *testing.T) {
+	readyCondition := corev1.NodeCondition{
+		Type: corev1.NodeReady,
+		LastTransitionTime: metav1.Time{
+			Time: time.Now(),
+		},
+	}
+
+	type result struct {
+		found     bool
+		condition corev1.NodeCondition
+	}
+
+	tests := []struct {
+		name string
+		node func() *corev1.Node
+		want result
+	}{
+		{
+			name: "return false when missing condition",
+			node: func() *corev1.Node {
+				node := newWorkerNode("not-ready")
+				node.Status.Conditions = []corev1.NodeCondition{}
+				return node
+			},
+			want: result{
+				found: false,
+			},
+		},
+		{
+			name: "return true when Ready",
+			node: func() *corev1.Node {
+				node := newWorkerNode("ready")
+				node.Status.Conditions = []corev1.NodeCondition{readyCondition}
+				return node
+			},
+			want: result{
+				found:     true,
+				condition: readyCondition,
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			node := tt.node()
+			got, found := FindReadyCondition(*node)
+
+			if found != tt.want.found {
+				t.Errorf("unexpected test result: expected found = %t, got = %t", tt.want.found, found)
+			}
+			if !reflect.DeepEqual(got, tt.want.condition) {
+				t.Errorf("unexpected test result: expected condition = %#v, got = %#v", tt.want.condition, got)
+			}
+		})
+	}
+}
+
+func Test_GetNodeRole(t *testing.T) {
+	var (
+		workerLabel = fmt.Sprintf("%s/%s", RoleLabelPrefix, WorkerRoleSuffix)
+		infraLabel  = fmt.Sprintf("%s/infra", RoleLabelPrefix)
+	)
+
+	type result struct {
+		label string
+		found bool
+	}
+	tests := []struct {
+		name string
+		node func() *corev1.Node
+		want result
+	}{
+		{
+			name: "not found when no role label",
+			node: func() *corev1.Node {
+				node := newWorkerNode("labelless")
+				node.Labels = map[string]string{}
+				return node
+			},
+			want: result{
+				found: false,
+			},
+		},
+		{
+			name: "label returned when found on worker",
+			node: func() *corev1.Node {
+				node := newWorkerNode("labelled")
+				node.Labels = map[string]string{workerLabel: ""}
+				return node
+			},
+			want: result{
+				found: true,
+				label: workerLabel,
+			},
+		},
+		{
+			name: "label returned when found on non-worker",
+			node: func() *corev1.Node {
+				node := newWorkerNode("labelled")
+				node.Labels = map[string]string{infraLabel: ""}
+				return node
+			},
+			want: result{
+				found: true,
+				label: infraLabel,
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			node := tt.node()
+			got, found := GetRole(*node)
+			if found != tt.want.found {
+				t.Errorf("unexpected test result: expected found = %t, got = %t", tt.want.found, found)
+			}
+			if got != tt.want.label {
+				t.Errorf("unexpected test result: expected label = %q, got = %q", tt.want.label, got)
+			}
+		})
+	}
+}
+
+func newWorkerNode(name string) *corev1.Node {
+	n := &corev1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   name,
+			Labels: map[string]string{fmt.Sprintf("%s/%s", RoleLabelPrefix, WorkerRoleSuffix): ""},
+		},
+	}
+	return n
+}
diff --git a/pkg/k8s/scheme.go b/pkg/k8s/scheme.go
index 6c3ef57f..4263d409 100644
--- a/pkg/k8s/scheme.go
+++ b/pkg/k8s/scheme.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 
 	configv1 "github.com/openshift/api/config/v1"
+	machinev1beta1 "github.com/openshift/api/machine/v1beta1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 )
@@ -20,5 +21,9 @@ func initScheme() (*runtime.Scheme, error) {
 		return nil, fmt.Errorf("unable to add config.openshift.io/v1 scheme: %w", err)
 	}
 
+	if err := machinev1beta1.AddToScheme(scheme); err != nil {
+		return nil, fmt.Errorf("unable to add machine.openshift.io/v1beta1 scheme: %w", err)
+	}
+
 	return scheme, nil
 }
diff --git a/test/generate_incident.sh b/test/generate_incident.sh
index 53da375d..85bab87c 100755
--- a/test/generate_incident.sh
+++ b/test/generate_incident.sh
@@ -8,6 +8,7 @@ declare -A alert_mapping=(
     ["ClusterProvisioningDelay"]="ClusterProvisioningDelay -"
     ["ClusterMonitoringErrorBudgetBurnSRE"]="ClusterMonitoringErrorBudgetBurnSRE Critical (1)"
     ["InsightsOperatorDown"]="InsightsOperatorDown"
+    ["MachineHealthCheckUnterminatedShortCircuitSRE"]="MachineHealthCheckUnterminatedShortCircuitSRE CRITICAL (1)"
 )
 
 # Function to print help message