Skip to content

Commit 6fa8460

Browse files
committed
FEATURE: Add sidekiq_job_error_count counter metric
There is currently a `sidekiq_job_count` metrics which counts the number of sidekiq jobs that ran successfully. It is useful to also count the number of sidekiq jobs that encountered an error so that we can calculate the error rate of sidekiq jobs.
1 parent ddc5b80 commit 6fa8460

File tree

4 files changed

+58
-5
lines changed

4 files changed

+58
-5
lines changed

lib/collector.rb

+16-3
Original file line numberDiff line numberDiff line change
@@ -196,25 +196,37 @@ def ensure_global_metrics
196196
def process_job(metric)
197197
ensure_job_metrics
198198
hash = { job_name: metric.job_name }
199+
199200
if metric.scheduled
200201
@scheduled_job_duration_seconds.observe(metric.duration, hash)
201202
@scheduled_job_count.observe(metric.count, hash)
202203
else
203204
@sidekiq_job_duration_seconds.observe(metric.duration, hash)
204-
@sidekiq_job_count.observe(metric.count, hash)
205+
206+
if metric.success
207+
@sidekiq_job_count.observe(metric.count, hash)
208+
else
209+
@sidekiq_job_error_count.observe(metric.count, hash)
210+
end
205211
end
206212
end
207213

208214
def ensure_job_metrics
209215
unless @scheduled_job_count
210216
@scheduled_job_duration_seconds =
211217
Counter.new("scheduled_job_duration_seconds", "Total time spent in scheduled jobs")
218+
212219
@scheduled_job_count =
213-
Counter.new("scheduled_job_count", "Total number of scheduled jobs executued")
220+
Counter.new("scheduled_job_count", "Total number of scheduled jobs that succeeded")
221+
214222
@sidekiq_job_duration_seconds =
215223
Counter.new("sidekiq_job_duration_seconds", "Total time spent in sidekiq jobs")
224+
216225
@sidekiq_job_count =
217-
Counter.new("sidekiq_job_count", "Total number of sidekiq jobs executed")
226+
Counter.new("sidekiq_job_count", "Total number of sidekiq jobs that succeeded")
227+
228+
@sidekiq_job_error_count =
229+
Counter.new("sidekiq_job_error_count", "Total number of sidekiq jobs that failed")
218230
end
219231
end
220232

@@ -403,6 +415,7 @@ def job_metrics
403415
@scheduled_job_count,
404416
@sidekiq_job_duration_seconds,
405417
@sidekiq_job_count,
418+
@sidekiq_job_error_count,
406419
]
407420
else
408421
[]

lib/internal_metric/job.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
module DiscoursePrometheus::InternalMetric
44
class Job < Base
5-
attribute :job_name, :scheduled, :duration, :count
5+
attribute :job_name, :scheduled, :duration, :count, :success
66
end
77
end

plugin.rb

+13-1
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,27 @@ module ::DiscoursePrometheus
6565
metric.job_name = stat.name
6666
metric.duration = stat.duration_ms * 0.001
6767
metric.count = 1
68+
metric.success = true
6869
$prometheus_client.send_json metric.to_h unless Rails.env.test?
6970
end
7071

71-
on(:sidekiq_job_ran) do |worker, msg, queue, duration|
72+
on(:sidekiq_job_ran) do |worker, _msg, _queue, duration|
7273
metric = DiscoursePrometheus::InternalMetric::Job.new
7374
metric.scheduled = false
7475
metric.duration = duration
7576
metric.count = 1
7677
metric.job_name = worker.class.to_s
78+
metric.success = true
79+
$prometheus_client.send_json metric.to_h unless Rails.env.test?
80+
end
81+
82+
on(:sidekiq_job_error) do |worker, _msg, _queue, duration|
83+
metric = DiscoursePrometheus::InternalMetric::Job.new
84+
metric.scheduled = false
85+
metric.duration = duration
86+
metric.count = 1
87+
metric.job_name = worker.class.to_s
88+
metric.success = false
7789
$prometheus_client.send_json metric.to_h unless Rails.env.test?
7890
end
7991
end

spec/lib/collector_spec.rb

+28
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,34 @@
4646
expect(counter.data).to eq(nil => 3)
4747
end
4848

49+
it "handles sidekiq job metrics" do
50+
metric_1 = DiscoursePrometheus::InternalMetric::Job.new
51+
metric_1.scheduled = false
52+
metric_1.job_name = "Bob"
53+
metric_1.duration = 1.778
54+
metric_1.count = 1
55+
metric_1.success = true
56+
57+
collector.process(metric_1.to_json)
58+
metrics = collector.prometheus_metrics
59+
60+
metric_2 = DiscoursePrometheus::InternalMetric::Job.new
61+
metric_2.scheduled = false
62+
metric_2.job_name = "Bob"
63+
metric_2.duration = 0.5
64+
metric_2.count = 1
65+
metric_2.success = false
66+
collector.process(metric_2.to_json)
67+
68+
duration = metrics.find { |m| m.name == "sidekiq_job_duration_seconds" }
69+
sidekiq_job_count = metrics.find { |m| m.name == "sidekiq_job_count" }
70+
sidekiq_job_error_count = metrics.find { |m| m.name == "sidekiq_job_error_count" }
71+
72+
expect(duration.data).to eq({ job_name: "Bob" } => metric_1.duration + metric_2.duration)
73+
expect(sidekiq_job_count.data).to eq({ job_name: "Bob" } => 1)
74+
expect(sidekiq_job_error_count.data).to eq({ job_name: "Bob" } => 1)
75+
end
76+
4977
it "handles scheduled job metrics" do
5078
metric = DiscoursePrometheus::InternalMetric::Job.new
5179

0 commit comments

Comments
 (0)