Skip to content

Commit 6be788d

Browse files
committed
modified based on Amar's feedback
1 parent 4923cfa commit 6be788d

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

infrastructure/monitoring/monitoring-dashboard.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Resources:
3535
Properties:
3636
DashboardName: !Ref DashboardName
3737
DashboardBody:
38-
!Sub '{"variables":[{"type":"property","property":"Application","inputType":"input","id":"Application","label":"Application","visible":true},{"type":"property","property":"Id","inputType":"input","id":"Id","label":"Kinesis Stream Name","visible":true}],"widgets":[{"height":6,"width":6,"y":1,"x":0,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","fullRestarts","Application","${ApplicationName}",{"region":"${MSFRegion}","yAxis":"left","color":"#d62728"}],[".","uptime",".",".",{"region":"${MSFRegion}","yAxis":"right","color":"#2ca02c"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Full Restarts and Uptime of Application","yAxis":{"left":{"showUnits":false,"label":"Count"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":1,"width":6,"y":0,"x":0,"type":"text","properties":{"markdown":"# Job Availability","background":"solid"}},{"height":1,"width":18,"y":0,"x":6,"type":"text","properties":{"markdown":"# Job Performance","background":"solid"}},{"height":6,"width":6,"y":1,"x":6,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","containerCPUUtilization","Application","${ApplicationName}",{"region":"${MSFRegion}","color":"#98df8a"}],[".","containerMemoryUtilization",".",".",{"region":"${MSFRegion}","yAxis":"left","color":"#1f77b4"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Average","title":"CPU and Memory (Container)","yAxis":{"left":{"showUnits":false,"label":"%"},"right":{"label":"%","showUnits":false}},"liveData":false,"legend":{"position":"bottom"},"annotations":{"horizontal":[{"label":"Threshold","value":80,"fill":"above"}]}}},{"height":6,"width":6,"y":1,"x":12,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","backPressuredTimeMsPerSecond","Application","${ApplicationName}",{"region":"${MSFRegion}","color":"#9467bd"}],[".","busyTimeMsPerSecond",".",".",{"region":"${MSFRegion}","yAxis":"right","color":"#d62728"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Average","title":"Backpressure and Busy Time","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":6,"width":6,"y":1,"x":18,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","lastCheckpointDuration","Application","${ApplicationName}",{"region":"${MSFRegion}","label":"lastCheckpointDuration","color":"#2ca02c"}],[".","lastCheckpointSize",".",".",{"yAxis":"right","region":"${MSFRegion}","color":"#1f77b4"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Last Checkpoint Duration","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Bytes","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":1,"width":12,"y":7,"x":0,"type":"text","properties":{"markdown":"# Job Progress","background":"solid"}},{"height":8,"width":6,"y":8,"x":6,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","currentInputWatermark","Application","${ApplicationName}",{"region":"${MSFRegion}"}],[".","currentOutputWatermark",".",".",{"region":"${MSFRegion}","yAxis":"right"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Watermarks (Event Time only)","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":8,"width":6,"y":8,"x":0,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","records_lag_max","Application","${ApplicationName}",{"region":"${MSFRegion}"}],[".","millisBehindLatest","Id","${KinesisStreamName}","Application","${ApplicationName}","Flow","Input",{"region":"${MSFRegion}","yAxis":"right"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Record Lag (Kinesis or Kafka)","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":3,"width":12,"y":7,"x":12,"type":"text","properties":{"markdown":"# numRecordsIn / Out\nPlease Note: You must enable task level metrics for these metrics to appear properly\nSources and Sinks may share the same names between Kinesis and Kafka, so feel free to modify this dashboard based on your application specifics.","background":"solid"}},{"height":6,"width":12,"y":10,"x":12,"type":"metric","properties":{"metrics":[[{"expression":"m1 / 4","label":"kafka numRecordsIn (Calculated)","id":"kafka_in","region":"${MSFRegion}"}],[{"expression":"m2 / 4","label":"kafka numRecordsOut (Calculated)","id":"kakfa_out","yAxis":"right","region":"${MSFRegion}"}],[{"expression":"m4 / 4","label":"kinesis numRecordsOut (Calculated)","id":"kinesis_out","yAxis":"right"}],[{"expression":"m3 / 4","label":"kinesis numRecordsIn (Calculated)","id":"kinesis_in"}],["AWS/KinesisAnalytics","numRecordsOut","Task","Source:_Kafka_source","Application","${ApplicationName}",{"id":"m1","visible":false,"region":"${MSFRegion}"}],[".","numRecordsIn",".","Sink:_Writer",".",".",{"id":"m2","visible":false,"region":"${MSFRegion}"}],[".","numRecordsOut",".","Source:_Kinesis_Source",".","${ApplicationName}",{"id":"m3","visible":false}],[".","numRecordsIn",".","Sink:_Writer",".",".",{"id":"m4","visible":false}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","stat":"Sum","period":60,"title":"Records In & Out"}}]}'
38+
!Sub '{"variables":[{"type":"property","property":"Application","inputType":"input","id":"Application","label":"Flink Application Name","visible":true},{"type":"property","property":"Id","inputType":"input","id":"Id","label":"Kinesis Stream Name","visible":true}],"widgets":[{"height":6,"width":6,"y":1,"x":0,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","fullRestarts","Application","${ApplicationName}",{"region":"${MSFRegion}","yAxis":"left","color":"#d62728"}],[".","uptime",".",".",{"region":"${MSFRegion}","yAxis":"right","color":"#2ca02c"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Full Restarts and Uptime of Application","yAxis":{"left":{"showUnits":false,"label":"Count"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":1,"width":6,"y":0,"x":0,"type":"text","properties":{"markdown":"# Job Availability","background":"solid"}},{"height":1,"width":18,"y":0,"x":6,"type":"text","properties":{"markdown":"# Job Performance","background":"solid"}},{"height":6,"width":6,"y":1,"x":6,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","containerCPUUtilization","Application","${ApplicationName}",{"region":"${MSFRegion}","color":"#98df8a"}],[".","containerMemoryUtilization",".",".",{"region":"${MSFRegion}","yAxis":"left","color":"#1f77b4"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Average","title":"CPU and Memory (Container)","yAxis":{"left":{"showUnits":false,"label":"%"},"right":{"label":"%","showUnits":false}},"liveData":false,"legend":{"position":"bottom"},"annotations":{"horizontal":[{"label":"Threshold","value":80,"fill":"above"}]}}},{"height":6,"width":6,"y":1,"x":12,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","backPressuredTimeMsPerSecond","Application","${ApplicationName}",{"region":"${MSFRegion}","color":"#9467bd"}],[".","busyTimeMsPerSecond",".",".",{"region":"${MSFRegion}","yAxis":"right","color":"#d62728"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Average","title":"Backpressure and Busy Time","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":6,"width":6,"y":1,"x":18,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","lastCheckpointDuration","Application","${ApplicationName}",{"region":"${MSFRegion}","label":"lastCheckpointDuration","color":"#2ca02c"}],[".","lastCheckpointSize",".",".",{"yAxis":"right","region":"${MSFRegion}","color":"#1f77b4"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Last Checkpoint Duration","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Bytes","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":1,"width":12,"y":7,"x":0,"type":"text","properties":{"markdown":"# Job Progress","background":"solid"}},{"height":8,"width":6,"y":8,"x":6,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","currentInputWatermark","Application","${ApplicationName}",{"region":"${MSFRegion}"}],[".","currentOutputWatermark",".",".",{"region":"${MSFRegion}","yAxis":"right"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Watermarks (Event Time only)","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":8,"width":6,"y":8,"x":0,"type":"metric","properties":{"metrics":[["AWS/KinesisAnalytics","records_lag_max","Application","${ApplicationName}",{"region":"${MSFRegion}"}],[".","millisBehindLatest","Id","${KinesisStreamName}","Application","${ApplicationName}","Flow","Input",{"region":"${MSFRegion}","yAxis":"right"}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","period":60,"stat":"Maximum","title":"Record Lag (Kinesis or Kafka)","yAxis":{"left":{"showUnits":false,"label":"Milliseconds"},"right":{"label":"Milliseconds","showUnits":false}},"liveData":false,"legend":{"position":"bottom"}}},{"height":3,"width":12,"y":7,"x":12,"type":"text","properties":{"markdown":"# numRecordsIn / Out\nPlease Note: You must enable task level metrics for these metrics to appear properly. \nSources and Sinks may share the same names between Kinesis and Kafka, so feel free to modify this dashboard based on your application specifics.","background":"solid"}},{"height":6,"width":12,"y":10,"x":12,"type":"metric","properties":{"metrics":[[{"expression":"m1 / 4","label":"kafka numRecordsIn (Calculated)","id":"kafka_in","region":"${MSFRegion}"}],[{"expression":"m2 / 4","label":"kafka numRecordsOut (Calculated)","id":"kakfa_out","yAxis":"right","region":"${MSFRegion}"}],[{"expression":"m4 / 4","label":"kinesis numRecordsOut (Calculated)","id":"kinesis_out","yAxis":"right"}],[{"expression":"m3 / 4","label":"kinesis numRecordsIn (Calculated)","id":"kinesis_in"}],["AWS/KinesisAnalytics","numRecordsOut","Task","Source:_Kafka_source","Application","${ApplicationName}",{"id":"m1","visible":false,"region":"${MSFRegion}"}],[".","numRecordsIn",".","Sink:_Writer",".",".",{"id":"m2","visible":false,"region":"${MSFRegion}"}],[".","numRecordsOut",".","Source:_Kinesis_Source",".","${ApplicationName}",{"id":"m3","visible":false}],[".","numRecordsIn",".","Sink:_Writer",".",".",{"id":"m4","visible":false}]],"view":"timeSeries","stacked":false,"region":"${MSFRegion}","stat":"Sum","period":60,"title":"Records In & Out"}}]}'
3939
Outputs:
4040
CloudwatchDashboard:
4141
Description: "Dashboard created to monitor the Managed Service for Apache Flink Application"

0 commit comments

Comments
 (0)