Skip to content

Commit dc2efdf

Browse files
tomsun28zqr10159
andauthored
[improve] support bind metrics label and others into alert (apache#3146)
Signed-off-by: tomsun28 <[email protected]> Co-authored-by: Logic <[email protected]>
1 parent 4defb73 commit dc2efdf

File tree

16 files changed

+268
-106
lines changed

16 files changed

+268
-106
lines changed

hertzbeat-alerter/src/main/java/org/apache/hertzbeat/alert/calculate/RealTimeAlertCalculator.java

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ public class RealTimeAlertCalculator {
5555
private static final int CALCULATE_THREADS = 3;
5656

5757
private static final String KEY_INSTANCE = "__instance__";
58+
private static final String KEY_INSTANCE_NAME = "__instancename__";
59+
private static final String KEY_INSTANCE_HOST = "__instancehost__";
5860
private static final String KEY_APP = "__app__";
5961
private static final String KEY_METRICS = "__metrics__";
6062
private static final String KEY_PRIORITY = "__priority__";
@@ -125,10 +127,14 @@ private void startCalculate() {
125127
private void calculate(CollectRep.MetricsData metricsData) {
126128
long currentTimeMilli = System.currentTimeMillis();
127129
String instance = String.valueOf(metricsData.getId());
130+
String instanceName = metricsData.getInstanceName();
131+
String instanceHost = metricsData.getInstanceHost();
128132
String app = metricsData.getApp();
129133
String metrics = metricsData.getMetrics();
130134
int priority = metricsData.getPriority();
131135
int code = metricsData.getCode().getNumber();
136+
Map<String, String> labels = metricsData.getLabels();
137+
Map<String, String> annotations = metricsData.getAnnotations();
132138
List<AlertDefine> thresholds = this.alertDefineService.getRealTimeAlertDefines();
133139
// Filter thresholds by app, metrics and instance
134140
thresholds = filterThresholdsByAppAndMetrics(thresholds, app, metrics, instance, priority);
@@ -137,13 +143,18 @@ private void calculate(CollectRep.MetricsData metricsData) {
137143
}
138144
Map<String, Object> commonContext = new HashMap<>(8);
139145
commonContext.put(KEY_INSTANCE, instance);
146+
commonContext.put(KEY_INSTANCE_NAME, instanceName);
147+
commonContext.put(KEY_INSTANCE_HOST, instanceHost);
140148
commonContext.put(KEY_APP, app);
141149
commonContext.put(KEY_PRIORITY, priority);
142150
commonContext.put(KEY_CODE, code);
143151
commonContext.put(KEY_METRICS, metrics);
144152
if (priority == 0) {
145153
commonContext.put(KEY_AVAILABLE, metricsData.getCode() == CollectRep.Code.SUCCESS ? UP : DOWN);
146154
}
155+
if (labels != null) {
156+
commonContext.putAll(labels);
157+
}
147158
List<CollectRep.Field> fields = metricsData.getFields();
148159
Map<String, Object> fieldValueMap = new HashMap<>(8);
149160
int valueRowCount = metricsData.getValuesCount();
@@ -160,21 +171,26 @@ private void calculate(CollectRep.MetricsData metricsData) {
160171
if (StringUtils.isBlank(expr)) {
161172
continue;
162173
}
174+
Map<String, String> commonFingerPrints = new HashMap<>(8);
175+
commonFingerPrints.put(CommonConstants.LABEL_INSTANCE, instance);
176+
// here use the alert name as finger, not care the alert name may be changed
177+
commonFingerPrints.put(CommonConstants.LABEL_ALERT_NAME, define.getName());
178+
commonFingerPrints.put(CommonConstants.LABEL_INSTANCE_NAME, instanceName);
179+
commonFingerPrints.put(CommonConstants.LABEL_INSTANCE_HOST, instanceHost);
180+
commonFingerPrints.putAll(define.getLabels());
181+
if (labels != null) {
182+
commonFingerPrints.putAll(labels);
183+
}
163184
{
164185
// trigger the expr before the metrics data, due the available up down or others
165186
try {
166187
boolean match = execAlertExpression(fieldValueMap, expr, true);
167188
try {
168-
Map<String, String> fingerPrints = new HashMap<>(8);
169-
fingerPrints.put(CommonConstants.LABEL_INSTANCE, instance);
170-
// here use the alert name as finger, not care the alert name may be changed
171-
fingerPrints.put(CommonConstants.LABEL_ALERT_NAME, define.getName());
172-
fingerPrints.putAll(define.getLabels());
173189
if (match) {
174190
// If the threshold rule matches, the number of times the threshold has been triggered is determined and an alarm is triggered
175-
afterThresholdRuleMatch(currentTimeMilli, fingerPrints, fieldValueMap, define);
191+
afterThresholdRuleMatch(currentTimeMilli, commonFingerPrints, fieldValueMap, define, annotations);
176192
} else {
177-
handleRecoveredAlert(fingerPrints);
193+
handleRecoveredAlert(commonFingerPrints);
178194
}
179195
// if this threshold pre compile success, ignore blew
180196
continue;
@@ -192,9 +208,7 @@ private void calculate(CollectRep.MetricsData metricsData) {
192208
fieldValueMap.put(KEY_ROW, valueRowCount);
193209
fieldValueMap.putAll(commonContext);
194210
fingerPrints.clear();
195-
fingerPrints.put(CommonConstants.LABEL_INSTANCE, instance);
196-
fingerPrints.put(CommonConstants.LABEL_ALERT_NAME, define.getName());
197-
fingerPrints.putAll(define.getLabels());
211+
fingerPrints.putAll(commonFingerPrints);
198212
for (int index = 0; index < valueRow.getColumnsList().size(); index++) {
199213
String valueStr = valueRow.getColumns(index);
200214
if (CommonConstants.NULL_VALUE.equals(valueStr)) {
@@ -228,7 +242,7 @@ private void calculate(CollectRep.MetricsData metricsData) {
228242
boolean match = execAlertExpression(fieldValueMap, expr, false);
229243
try {
230244
if (match) {
231-
afterThresholdRuleMatch(currentTimeMilli, fingerPrints, fieldValueMap, define);
245+
afterThresholdRuleMatch(currentTimeMilli, fingerPrints, fieldValueMap, define, annotations);
232246
} else {
233247
handleRecoveredAlert(fingerPrints);
234248
}
@@ -311,20 +325,31 @@ private void handleRecoveredAlert(Map<String, String> fingerprints) {
311325
pendingAlertMap.remove(fingerprint);
312326
}
313327

314-
private void afterThresholdRuleMatch(long currentTimeMilli, Map<String, String> fingerPrints,
315-
Map<String, Object> fieldValueMap, AlertDefine define) {
328+
private void afterThresholdRuleMatch(long currentTimeMilli, Map<String, String> fingerPrints,
329+
Map<String, Object> fieldValueMap, AlertDefine define, Map<String, String> annotations) {
316330
String fingerprint = calculateFingerprint(fingerPrints);
317331
SingleAlert existingAlert = pendingAlertMap.get(fingerprint);
318-
Map<String, String> labels = new HashMap<>(8);
319332
fieldValueMap.putAll(define.getLabels());
320-
labels.putAll(fingerPrints);
321333
int requiredTimes = define.getTimes() == null ? 1 : define.getTimes();
322334
if (existingAlert == null) {
323335
// First time triggering alert, create new alert and set to pending status
336+
Map<String, String> alertLabels = new HashMap<>(8);
337+
alertLabels.putAll(fingerPrints);
338+
Map<String, String> alertAnnotations = new HashMap<>(8);
339+
if (annotations != null) {
340+
alertAnnotations.putAll(annotations);
341+
}
342+
if (define.getAnnotations() != null) {
343+
alertAnnotations.putAll(define.getAnnotations());
344+
}
345+
// render var content in annotations
346+
for (Map.Entry<String, String> entry : alertAnnotations.entrySet()) {
347+
entry.setValue(AlertTemplateUtil.render(entry.getValue(), fieldValueMap));
348+
}
324349
SingleAlert newAlert = SingleAlert.builder()
325-
.labels(labels)
326-
// todo render var content in annotations
327-
.annotations(define.getAnnotations())
350+
.labels(alertLabels)
351+
.annotations(alertAnnotations)
352+
// render var content in content
328353
.content(AlertTemplateUtil.render(define.getTemplate(), fieldValueMap))
329354
.status(CommonConstants.ALERT_STATUS_PENDING)
330355
.triggerTimes(1)

hertzbeat-alerter/src/main/java/org/apache/hertzbeat/alert/service/impl/NoticeConfigServiceImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ public boolean sendTestMsg(NoticeReceiver noticeReceiver) {
305305
Map<String, String> labels = new HashMap<>(8);
306306
labels.put(CommonConstants.LABEL_INSTANCE, "1000000");
307307
labels.put(CommonConstants.LABEL_ALERT_NAME, "CPU Usage Alert");
308-
labels.put(CommonConstants.LABEL_HOST, "127.0.0.1");
308+
labels.put(CommonConstants.LABEL_INSTANCE_HOST, "127.0.0.1");
309309
Map<String, String> annotations = new HashMap<>(8);
310310
annotations.put("suggest", "Please check the CPU usage of the server");
311311
SingleAlert singleAlert1 = SingleAlert.builder()

hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/PrometheusAutoCollectImpl.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ private List<CollectRep.MetricsData> parseResponseByPrometheusExporter(String re
173173
Map<String, MetricFamily> metricFamilyMap = TextParser.textToMetricFamilies(resp);
174174
List<CollectRep.MetricsData> metricsDataList = new LinkedList<>();
175175
for (Map.Entry<String, MetricFamily> entry : metricFamilyMap.entrySet()) {
176-
builder.clearMetrics();
177176
builder.clearFields();
178177
builder.clearValues();
179178
String metricsName = entry.getKey();

hertzbeat-collector/hertzbeat-collector-collector/src/main/java/org/apache/hertzbeat/collector/dispatch/MetricsCollect.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,18 @@ public class MetricsCollect implements Runnable, Comparable<MetricsCollect> {
7575
* metrics configuration
7676
*/
7777
protected Metrics metrics;
78+
/**
79+
* metadata
80+
*/
81+
protected Map<String, String> metadata;
82+
/**
83+
* labels
84+
*/
85+
protected Map<String, String> labels;
86+
/**
87+
* annotations
88+
*/
89+
protected Map<String, String> annotations;
7890
/**
7991
* time wheel timeout
8092
*/
@@ -119,6 +131,9 @@ public MetricsCollect(Metrics metrics, Timeout timeout,
119131
this.id = job.getMonitorId();
120132
this.tenantId = job.getTenantId();
121133
this.app = job.getApp();
134+
this.metadata = job.getMetadata();
135+
this.labels = job.getLabels();
136+
this.annotations = job.getAnnotations();
122137
this.collectDataDispatch = collectDataDispatch;
123138
this.isCyclic = job.isCyclic();
124139
this.isSd = job.isSd();
@@ -136,9 +151,8 @@ public void run() {
136151
this.startTime = System.currentTimeMillis();
137152
setNewThreadName(id, app, startTime, metrics);
138153
CollectRep.MetricsData.Builder response = CollectRep.MetricsData.newBuilder();
139-
response.setApp(app);
140-
response.setId(id);
141-
response.setTenantId(tenantId);
154+
response.setApp(app).setId(id).setTenantId(tenantId)
155+
.setLabels(labels).setAnnotations(annotations).addMetadataAll(metadata);
142156
// for prometheus auto
143157
if (DispatchConstants.PROTOCOL_PROMETHEUS.equalsIgnoreCase(metrics.getProtocol())) {
144158
List<CollectRep.MetricsData> metricsData = PrometheusAutoCollectImpl

hertzbeat-common/src/main/java/org/apache/hertzbeat/common/constants/CommonConstants.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,14 @@ public interface CommonConstants {
9393
String LABEL_ALERT_NAME = "alertname";
9494

9595
/**
96-
* label key: host
96+
* label key: instance host
9797
*/
98-
String LABEL_HOST = "host";
98+
String LABEL_INSTANCE_HOST = "instancehost";
99+
100+
/**
101+
* label key: instance name
102+
*/
103+
String LABEL_INSTANCE_NAME = "instancename";
99104

100105
/**
101106
* Alarm severity label key

hertzbeat-common/src/main/java/org/apache/hertzbeat/common/constants/MetricDataConstants.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,16 @@ public interface MetricDataConstants {
2727
String UNIT = "unit";
2828

2929
// key in Schema metadata
30-
String MONITOR_ID = "id";
30+
String ID = "id";
3131
String TENANT_ID = "tenantId";
3232
String APP = "app";
3333
String METRICS = "metrics";
34+
String LABELS = "labels";
35+
String ANNOTATIONS = "annotations";
3436
String PRIORITY = "priority";
3537
String TIME = "time";
3638
String CODE = "code";
3739
String MSG = "msg";
40+
String INSTANCE_NAME = "instancename";
41+
String INSTANCE_HOST = "instancehost";
3842
}

hertzbeat-common/src/main/java/org/apache/hertzbeat/common/entity/job/Job.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,19 @@ public class Job {
6161
* Monitoring Task ID
6262
*/
6363
private long monitorId;
64+
/**
65+
* metadata info bind with this job
66+
* eg: instancename, instancehost
67+
*/
68+
private Map<String, String> metadata;
69+
/**
70+
* bind labels
71+
*/
72+
private Map<String, String> labels;
73+
/**
74+
* bind annotations
75+
*/
76+
private Map<String, String> annotations;
6477
/**
6578
* Is hide this app in main menus layout, only for app type, default true.
6679
*/

0 commit comments

Comments
 (0)