Skip to content

Commit c75cca0

Browse files
authored
fix: insufficient error handling for cluster down (#385)
1 parent db3a3c8 commit c75cca0

File tree

10 files changed

+123
-16
lines changed

10 files changed

+123
-16
lines changed

bin/pubsub

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ module PubSubDebug
6565
log "#{role}: recv: #{e.class}"
6666
rescue RedisClient::CommandError => e
6767
log "#{role}: recv: #{e.class}: #{e.message}"
68-
raise unless e.message.start_with?('CLUSTERDOWN Hash slot not served')
68+
raise unless e.message.start_with?('CLUSTERDOWN')
6969
rescue StandardError => e
7070
log "#{role}: recv: #{e.class}: #{e.message}"
7171
raise

bin/singlepiptx

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
require 'bundler/setup'
5+
require 'redis_cluster_client'
6+
7+
module SinglePipTxDebug
8+
module_function
9+
10+
def spawn_single(cli)
11+
Thread.new(cli) do |r|
12+
role = ' Single'
13+
14+
loop do
15+
handle_errors(role) do
16+
reply = r.call('incr', 'single')
17+
log "#{role}: #{reply}"
18+
end
19+
ensure
20+
sleep 1.0
21+
end
22+
rescue StandardError => e
23+
log "#{role}: dead: #{e.class}: #{e.message}"
24+
raise
25+
end
26+
end
27+
28+
def spawn_pipeline(cli)
29+
Thread.new(cli) do |r|
30+
role = ' Pipeline'
31+
32+
loop do
33+
handle_errors(role) do
34+
reply = r.pipelined do |pi|
35+
pi.call('incr', 'pipeline')
36+
pi.call('incr', 'pipeline')
37+
end
38+
39+
log "#{role}: #{reply}"
40+
end
41+
ensure
42+
sleep 1.0
43+
end
44+
rescue StandardError => e
45+
log "#{role}: dead: #{e.class}: #{e.message}"
46+
raise
47+
end
48+
end
49+
50+
def spawn_transaction(cli)
51+
Thread.new(cli) do |r|
52+
role = 'Transaction'
53+
54+
loop do
55+
handle_errors(role) do
56+
reply = r.multi do |tx|
57+
tx.call('incr', 'transaction')
58+
tx.call('incr', 'transaction')
59+
tx.call('incr', 'transaction')
60+
end
61+
62+
log "#{role}: #{reply}"
63+
end
64+
ensure
65+
sleep 1.0
66+
end
67+
rescue StandardError => e
68+
log "#{role}: dead: #{e.class}: #{e.message}"
69+
raise
70+
end
71+
end
72+
73+
def handle_errors(role) # rubocop:disable Metrics/AbcSize
74+
yield
75+
rescue RedisClient::ConnectionError, RedisClient::Cluster::InitialSetupError, RedisClient::Cluster::NodeMightBeDown => e
76+
log "#{role}: #{e.class}"
77+
rescue RedisClient::CommandError => e
78+
log "#{role}: #{e.class}: #{e.message}"
79+
raise unless e.message.start_with?('CLUSTERDOWN')
80+
rescue RedisClient::Cluster::ErrorCollection => e
81+
log "#{role}: #{e.class}: #{e.message}"
82+
raise unless e.errors.values.all? do |err|
83+
err.message.start_with?('CLUSTERDOWN') || err.is_a?(::RedisClient::ConnectionError)
84+
end
85+
rescue StandardError => e
86+
log "#{role}: #{e.class}: #{e.message}"
87+
raise
88+
end
89+
90+
def log(msg)
91+
print "#{msg}\n"
92+
end
93+
end
94+
95+
clients = Array.new(3) { RedisClient.cluster(connect_with_original_config: true).new_client }
96+
threads = []
97+
98+
Signal.trap(:INT) do
99+
threads.each(&:exit)
100+
clients.each(&:close)
101+
SinglePipTxDebug.log("\nBye bye")
102+
exit 0
103+
end
104+
105+
threads << SinglePipTxDebug.spawn_single(clients[0])
106+
threads << SinglePipTxDebug.spawn_pipeline(clients[1])
107+
threads << SinglePipTxDebug.spawn_transaction(clients[2])
108+
threads.each(&:join)

lib/redis_client/cluster/errors.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def initialize(errors)
3737
end
3838

3939
@errors = errors
40-
messages = @errors.map { |node_key, error| "#{node_key}: #{error.message}" }
41-
super("Errors occurred on any node: #{messages.join(', ')}")
40+
messages = @errors.map { |node_key, error| "#{node_key}: (#{error.class}) #{error.message}" }
41+
super(messages.join(', '))
4242
end
4343
end
4444

lib/redis_client/cluster/optimistic_locking.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def watch(keys) # rubocop:disable Metrics/AbcSize
3333
raise
3434
end
3535
rescue ::RedisClient::CommandError => e
36-
@router.renew_cluster_state if e.message.start_with?('CLUSTERDOWN Hash slot not served')
36+
@router.renew_cluster_state if e.message.start_with?('CLUSTERDOWN')
3737
raise
3838
end
3939
rescue ::RedisClient::ConnectionError

lib/redis_client/cluster/pipeline.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def call_pipelined_aware_of_redirection(commands, timeouts, exception:) # ruboco
7373
first_exception ||= result
7474
end
7575

76-
stale_cluster_state = true if result.message.start_with?('CLUSTERDOWN Hash slot not served')
76+
stale_cluster_state = true if result.message.start_with?('CLUSTERDOWN')
7777
end
7878

7979
results[index] = result

lib/redis_client/cluster/pub_sub.rb

+2-3
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,10 @@ def next_event(timeout = nil) # rubocop:disable Metrics/AbcSize, Metrics/Cycloma
9090

9191
case event = @queue.pop(true)
9292
when ::RedisClient::CommandError
93-
raise event unless event.message.start_with?('MOVED', 'CLUSTERDOWN Hash slot not served')
93+
raise event unless event.message.start_with?('MOVED', 'CLUSTERDOWN')
9494

9595
break start_over
96-
when ::RedisClient::ConnectionError
97-
break start_over
96+
when ::RedisClient::ConnectionError then break start_over
9897
when StandardError then raise event
9998
when Array then break event
10099
end

lib/redis_client/cluster/router.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,15 @@ def send_command(method, command, *args, &block) # rubocop:disable Metrics/AbcSi
7474
renew_cluster_state
7575
raise
7676
rescue ::RedisClient::CommandError => e
77-
renew_cluster_state if e.message.start_with?('CLUSTERDOWN Hash slot not served')
77+
renew_cluster_state if e.message.start_with?('CLUSTERDOWN')
7878
raise
7979
rescue ::RedisClient::Cluster::ErrorCollection => e
8080
raise if e.errors.any?(::RedisClient::CircuitBreaker::OpenCircuitError)
8181

8282
renew_cluster_state if e.errors.values.any? do |err|
8383
next false if ::RedisClient::Cluster::ErrorIdentification.identifiable?(err) && @node.none? { |c| ::RedisClient::Cluster::ErrorIdentification.client_owns_error?(err, c) }
8484

85-
err.message.start_with?('CLUSTERDOWN Hash slot not served') || err.is_a?(::RedisClient::ConnectionError)
85+
err.message.start_with?('CLUSTERDOWN') || err.is_a?(::RedisClient::ConnectionError)
8686
end
8787

8888
raise
@@ -123,7 +123,7 @@ def handle_redirection(node, retry_count:) # rubocop:disable Metrics/AbcSize, Me
123123
node.call('ASKING')
124124
retry
125125
end
126-
elsif e.message.start_with?('CLUSTERDOWN Hash slot not served')
126+
elsif e.message.start_with?('CLUSTERDOWN')
127127
renew_cluster_state
128128
retry if retry_count >= 0
129129
end

lib/redis_client/cluster/transaction.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def handle_command_error!(err, redirect:) # rubocop:disable Metrics/AbcSize
170170
elsif err.message.start_with?('ASK')
171171
node = @router.assign_asking_node(err.message)
172172
try_asking(node) ? send_transaction(node, redirect: redirect - 1) : err
173-
elsif err.message.start_with?('CLUSTERDOWN Hash slot not served')
173+
elsif err.message.start_with?('CLUSTERDOWN')
174174
@router.renew_cluster_state if @watching_slot.nil?
175175
raise err
176176
else

test/redis_client/cluster/test_errors.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
class RedisClient
66
class Cluster
77
class TestErrors < TestingWrapper
8-
DummyError = Struct.new('DummyError', :message)
8+
DummyError = Class.new(StandardError)
99

1010
def test_initial_setup_error
1111
[
@@ -44,11 +44,11 @@ def test_error_collection_error
4444
[
4545
{
4646
errors: { '127.0.0.1:6379' => DummyError.new('foo') },
47-
want: { msg: 'Errors occurred on any node: 127.0.0.1:6379: foo', size: 1 }
47+
want: { msg: '127.0.0.1:6379: (RedisClient::Cluster::TestErrors::DummyError) foo', size: 1 }
4848
},
4949
{
5050
errors: { '127.0.0.1:6379' => DummyError.new('foo'), '127.0.0.1:6380' => DummyError.new('bar') },
51-
want: { msg: 'Errors occurred on any node: 127.0.0.1:6379: foo, 127.0.0.1:6380: bar', size: 2 }
51+
want: { msg: '127.0.0.1:6379: (RedisClient::Cluster::TestErrors::DummyError) foo, 127.0.0.1:6380: (RedisClient::Cluster::TestErrors::DummyError) bar', size: 2 }
5252
},
5353
{ errors: {}, want: { msg: '{}', size: 0 } },
5454
{ errors: [], want: { msg: '[]', size: 0 } },

test/test_against_cluster_scale.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def retryable(attempts:)
115115
attempts -= 1
116116
break yield
117117
rescue ::RedisClient::CommandError => e
118-
raise unless e.message.start_with?('CLUSTERDOWN Hash slot not served')
118+
raise unless e.message.start_with?('CLUSTERDOWN')
119119

120120
@cluster_down_error_count += 1
121121
sleep WAIT_SEC

0 commit comments

Comments
 (0)