Skip to content

Commit e1eeb65

Browse files
authored
FEATURE: Add success label to (sidekiq|scheduled)_job_count (#137)
Before this commit, `scheduled_job_count` represents the total number of scheduled jobs executed regardless of its execution status while `sidekiq_job_count` represents the total number of sidekiq job executed successfuly only. In order to allow the error rate for a particular job, we need to be able to differentiate between successes and failures. This commit seeks to do so by adding a `success` label to the `sidekiq_job_count` and `scheduled_job_count` counters.
1 parent e3ef17a commit e1eeb65

File tree

5 files changed

+90
-15
lines changed

5 files changed

+90
-15
lines changed

lib/collector.rb

+6-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ def ensure_global_metrics
195195

196196
def process_job(metric)
197197
ensure_job_metrics
198-
hash = { job_name: metric.job_name }
198+
hash = { job_name: metric.job_name, success: metric.success }
199+
199200
if metric.scheduled
200201
@scheduled_job_duration_seconds.observe(metric.duration, hash)
201202
@scheduled_job_count.observe(metric.count, hash)
@@ -209,10 +210,13 @@ def ensure_job_metrics
209210
unless @scheduled_job_count
210211
@scheduled_job_duration_seconds =
211212
Counter.new("scheduled_job_duration_seconds", "Total time spent in scheduled jobs")
213+
212214
@scheduled_job_count =
213-
Counter.new("scheduled_job_count", "Total number of scheduled jobs executued")
215+
Counter.new("scheduled_job_count", "Total number of scheduled jobs executed")
216+
214217
@sidekiq_job_duration_seconds =
215218
Counter.new("sidekiq_job_duration_seconds", "Total time spent in sidekiq jobs")
219+
216220
@sidekiq_job_count =
217221
Counter.new("sidekiq_job_count", "Total number of sidekiq jobs executed")
218222
end

lib/internal_metric/job.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
module DiscoursePrometheus::InternalMetric
44
class Job < Base
5-
attribute :job_name, :scheduled, :duration, :count
5+
attribute :job_name, :scheduled, :duration, :count, :success
66
end
77
end

plugin.rb

+13-1
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,27 @@ module ::DiscoursePrometheus
6565
metric.job_name = stat.name
6666
metric.duration = stat.duration_ms * 0.001
6767
metric.count = 1
68+
metric.success = stats.success
6869
$prometheus_client.send_json metric.to_h unless Rails.env.test?
6970
end
7071

71-
on(:sidekiq_job_ran) do |worker, msg, queue, duration|
72+
on(:sidekiq_job_ran) do |worker, _msg, _queue, duration|
7273
metric = DiscoursePrometheus::InternalMetric::Job.new
7374
metric.scheduled = false
7475
metric.duration = duration
7576
metric.count = 1
7677
metric.job_name = worker.class.to_s
78+
metric.success = true
79+
$prometheus_client.send_json metric.to_h unless Rails.env.test?
80+
end
81+
82+
on(:sidekiq_job_error) do |worker, _msg, _queue, duration|
83+
metric = DiscoursePrometheus::InternalMetric::Job.new
84+
metric.scheduled = false
85+
metric.duration = duration
86+
metric.count = 1
87+
metric.job_name = worker.class.to_s
88+
metric.success = false
7789
$prometheus_client.send_json metric.to_h unless Rails.env.test?
7890
end
7991
end

spec/lib/collector_spec.rb

+68-11
Original file line numberDiff line numberDiff line change
@@ -46,22 +46,78 @@
4646
expect(counter.data).to eq(nil => 3)
4747
end
4848

49-
it "handles scheduled job metrics" do
50-
metric = DiscoursePrometheus::InternalMetric::Job.new
49+
it "handles sidekiq job metrics" do
50+
metric_1 = DiscoursePrometheus::InternalMetric::Job.new
51+
metric_1.scheduled = false
52+
metric_1.job_name = "Bob"
53+
metric_1.duration = 1.778
54+
metric_1.count = 1
55+
metric_1.success = true
56+
57+
collector.process(metric_1.to_json)
58+
metrics = collector.prometheus_metrics
5159

52-
metric.scheduled = true
53-
metric.job_name = "Bob"
54-
metric.duration = 1.778
55-
metric.count = 1
60+
metric_2 = DiscoursePrometheus::InternalMetric::Job.new
61+
metric_2.scheduled = false
62+
metric_2.job_name = "Bob"
63+
metric_2.duration = 0.5
64+
metric_2.count = 1
65+
metric_2.success = false
66+
collector.process(metric_2.to_json)
67+
68+
metric_3 = DiscoursePrometheus::InternalMetric::Job.new
69+
metric_3.scheduled = false
70+
metric_3.job_name = "Bob"
71+
metric_3.duration = 1.5
72+
metric_3.count = 1
73+
metric_3.success = false
74+
collector.process(metric_3.to_json)
75+
76+
duration = metrics.find { |m| m.name == "sidekiq_job_duration_seconds" }
77+
sidekiq_job_count = metrics.find { |m| m.name == "sidekiq_job_count" }
78+
79+
expect(duration.data).to eq(
80+
{ job_name: "Bob", success: true } => metric_1.duration,
81+
{ job_name: "Bob", success: false } => metric_2.duration + metric_3.duration,
82+
)
83+
84+
expect(sidekiq_job_count.data).to eq(
85+
{ job_name: "Bob", success: false } => 2,
86+
{ job_name: "Bob", success: true } => 1,
87+
)
88+
end
89+
90+
it "handles scheduled job metrics" do
91+
metric_1 = DiscoursePrometheus::InternalMetric::Job.new
92+
metric_1.scheduled = true
93+
metric_1.job_name = "Bob"
94+
metric_1.duration = 1.778
95+
metric_1.success = true
96+
metric_1.count = 1
97+
collector.process(metric_1.to_json)
98+
99+
metric_2 = DiscoursePrometheus::InternalMetric::Job.new
100+
metric_2.scheduled = true
101+
metric_2.job_name = "Bob"
102+
metric_2.duration = 1.123123
103+
metric_2.success = false
104+
metric_2.count = 1
105+
collector.process(metric_2.to_json)
56106

57-
collector.process(metric.to_json)
58107
metrics = collector.prometheus_metrics
59108

60109
duration = metrics.find { |m| m.name == "scheduled_job_duration_seconds" }
61110
count = metrics.find { |m| m.name == "scheduled_job_count" }
62111

63-
expect(duration.data).to eq({ job_name: "Bob" } => 1.778)
64-
expect(count.data).to eq({ job_name: "Bob" } => 1)
112+
expect(duration.data).to eq(
113+
{ job_name: "Bob", success: true } => metric_1.duration,
114+
{ job_name: "Bob", success: false } => metric_2.duration,
115+
)
116+
117+
expect(count.data).to eq(
118+
{ job_name: "Bob", success: true } => 1,
119+
{ job_name: "Bob", success: false } => 1,
120+
)
65121
end
66122

67123
it "handles job initialization metrics" do
@@ -71,15 +127,16 @@
71127
metric.job_name = "Bob"
72128
metric.count = 0
73129
metric.duration = 0
130+
metric.success = true
74131

75132
collector.process(metric.to_json)
76133
metrics = collector.prometheus_metrics
77134

78135
duration = metrics.find { |m| m.name == "scheduled_job_duration_seconds" }
79136
count = metrics.find { |m| m.name == "scheduled_job_count" }
80137

81-
expect(duration.data).to eq({ job_name: "Bob" } => 0)
82-
expect(count.data).to eq({ job_name: "Bob" } => 0)
138+
expect(duration.data).to eq({ job_name: "Bob", success: true } => 0)
139+
expect(count.data).to eq({ job_name: "Bob", success: true } => 0)
83140
end
84141

85142
it "handles process metrics" do

spec/lib/internal_metric/base_spec.rb

+2
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
job.scheduled = true
88
job.duration = 100.1
99
job.count = 1
10+
job.success = false
1011

1112
expect(job.to_h).to eq(
1213
job_name: "bob",
1314
scheduled: true,
1415
duration: 100.1,
1516
count: 1,
1617
_type: "Job",
18+
success: false,
1719
)
1820
end
1921

0 commit comments

Comments
 (0)