Skip to content

Commit e706ffe

Browse files
committed
Bug#36750146 Ndb : Graceful node shutdown does not avoid event duplicates
Graceful node stop is intended to carefully handover responsibility for forwarding event streams from the stopping node to still-running nodes, so that there are no duplicate events sent to event subscribers as a result of an unexpected node failure. However, this was not working correctly which could lead to issues with event merge and other event consumers even when performing a graceful node stop. The problem is fixed and two new tests covering the problem with Graceful stop are added : test_event -n EventConsumer_Graceful test_event -n MergeEventConsumer_Graceful Change-Id: I2effa2e0908c920e48ac235a5e0f3a1fce10413a
1 parent 33a185c commit e706ffe

File tree

3 files changed

+524
-16
lines changed

3 files changed

+524
-16
lines changed

storage/ndb/src/kernel/blocks/suma/Suma.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2003, 2023, Oracle and/or its affiliates.
2+
Copyright (c) 2003, 2024, Oracle and/or its affiliates.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License, version 2.0,
@@ -5255,6 +5255,11 @@ Suma::execFIRE_TRIG_ORD(Signal* signal)
52555255

52565256
Uint32 bucket= hashValue % c_no_of_buckets;
52575257
m_max_seen_gci = (gci > m_max_seen_gci ? gci : m_max_seen_gci);
5258+
/**
5259+
* Normally a bucket is either in the active or switchover set, or neither.
5260+
* Exception is during add/drop NG when an active bucket may be in the
5261+
* switchover set for notifying subscribers of change.
5262+
*/
52585263
if(m_active_buckets.get(bucket) ||
52595264
(m_switchover_buckets.get(bucket) && (check_switchover(bucket, gci))))
52605265
{
@@ -6871,9 +6876,10 @@ Suma::execSUMA_HANDOVER_REQ(Signal* signal)
68716876
if(get_responsible_node(i) == nodeId &&
68726877
get_responsible_node(i, nodegroup) == getOwnNodeId())
68736878
{
6874-
// I'm will be running this bucket when nodeId shutdown
6879+
// I will be running this bucket when nodeId shutdown
68756880
jam();
68766881
tmp.set(i);
6882+
ndbassert(!m_active_buckets.get(i));
68776883
m_switchover_buckets.set(i);
68786884
c_buckets[i].m_switchover_gci = (Uint64(start_gci) << 32) - 1;
68796885
c_buckets[i].m_state |= Bucket::BUCKET_SHUTDOWN_TO;
@@ -6962,6 +6968,7 @@ Suma::execSUMA_HANDOVER_CONF(Signal* signal) {
69626968
nodeId, gci, buf, c_no_of_buckets);
69636969
g_eventLogger->info("Suma: handover from node %u gci: %u buckets: %s (%u)",
69646970
nodeId, gci, buf, c_no_of_buckets);
6971+
ndbassert(!m_active_buckets.overlaps(tmp));
69656972
m_switchover_buckets.bitOR(tmp);
69666973
c_startup.m_handover_nodes.clear(nodeId);
69676974
DBUG_VOID_RETURN;
@@ -6987,6 +6994,7 @@ Suma::execSUMA_HANDOVER_CONF(Signal* signal) {
69876994
nodeId, gci, buf, c_no_of_buckets);
69886995
g_eventLogger->info("Suma: handover to node %u gci: %u buckets: %s (%u)",
69896996
nodeId, gci, buf, c_no_of_buckets);
6997+
m_active_buckets.bitANDC(tmp);
69906998
m_switchover_buckets.bitOR(tmp);
69916999
c_startup.m_handover_nodes.clear(nodeId);
69927000
DBUG_VOID_RETURN;
@@ -7417,6 +7425,7 @@ Suma::start_resend(Signal* signal, Uint32 buck)
74177425
bucket->m_switchover_node = get_responsible_node(buck);
74187426
bucket->m_switchover_gci = max;
74197427

7428+
ndbassert(!m_active_buckets.get(buck));
74207429
m_switchover_buckets.set(buck);
74217430

74227431
signal->theData[0] = SumaContinueB::RESEND_BUCKET;

0 commit comments

Comments
 (0)