Skip to content

Commit c3576fa

Browse files
nacarvalhodahlerlend
authored andcommitted
BUG#37613510: Ever growing GR Transactions Rows Validating after secondary joins the group
Group Replication start operation checks if there are partial transactions on the `group_replication_applier` channel from a previous group participation. If partial transactions are found, `group_replication_applier` channel is stopped after applying all complete transactions, its relay logs purged and then the channel is restarted. After this step, distributed recovery kicks-in and applies the missing data from a group member. The Group Replication pipeline operation to stop the `group_replication_applier` channel was incorrectly stopping the periodic task from the certifier module, which was causing that some periodic internal operations were not taking place. One of the tasks that was not happening was the periodic send of the committed transactions, which omission was preventing the certification info garbage collection, which on its turn was causing the continuous increase of the column COUNT_TRANSACTIONS_ROWS_VALIDATING of the table performance_schema.replication_group_member_stats. To solve the above issue, the pipeline operation to stop the `group_replication_applier` channel now does not interfere with the certifier module. Change-Id: I1a4c2f7a5b6d0ca65caf43eeae38103a17b2d5ec
1 parent 4840244 commit c3576fa

File tree

6 files changed

+30
-32
lines changed

6 files changed

+30
-32
lines changed

mysql-test/suite/group_replication/r/gr_partial_trx_in_applier_relay_log.result

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ INSERT INTO t1 values (12);
4040
SET GLOBAL DEBUG="-d,stop_applier_channel_after_reading_write_rows_log_event";
4141
include/stop_group_replication.inc
4242
include/start_group_replication.inc
43+
include/assert.inc [Certifier broadcast thread must be running]
4344
########################################################################
4445
# 5. On M1: Insert another tuple and do a diff tables with other nodes.
4546
# (just to check that everything is working fine).

mysql-test/suite/group_replication/t/gr_partial_trx_in_applier_relay_log.test

+4
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ SET GLOBAL DEBUG="-d,stop_applier_channel_after_reading_write_rows_log_event";
9393
--let $wait_timeout=120
9494
--source include/start_group_replication.inc
9595

96+
--let $assert_text= Certifier broadcast thread must be running
97+
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.threads WHERE name = "thread/group_rpl/THD_certifier_broadcast"] = 1
98+
--source include/assert.inc
99+
96100
--echo ########################################################################
97101
--echo # 5. On M1: Insert another tuple and do a diff tables with other nodes.
98102
--echo # (just to check that everything is working fine).

plugin/group_replication/include/certifier.h

+1-14
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,8 @@ class Certifier_broadcast_thread {
169169

170170
/**
171171
Terminate broadcast thread.
172-
173-
@return the operation status
174-
@retval 0 OK
175-
@retval !=0 Error
176172
*/
177-
int terminate();
173+
void terminate();
178174

179175
/**
180176
Broadcast thread worker method.
@@ -268,15 +264,6 @@ class Certifier : public Certifier_interface {
268264
*/
269265
int initialize(ulonglong gtid_assignment_block_size);
270266

271-
/**
272-
Terminate certifier.
273-
274-
@return the operation status
275-
@retval 0 OK
276-
@retval !=0 Error
277-
*/
278-
int terminate();
279-
280267
/**
281268
Handle view changes on certifier.
282269
*/

plugin/group_replication/src/certifier.cc

+15-16
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,11 @@ int Certifier_broadcast_thread::initialize() {
9292
if ((mysql_thread_create(key_GR_THD_cert_broadcast, &broadcast_pthd,
9393
get_connection_attrib(), launch_broadcast_thread,
9494
(void *)this))) {
95-
mysql_mutex_unlock(&broadcast_run_lock); /* purecov: inspected */
96-
return 1; /* purecov: inspected */
95+
/* purecov: begin inspected */
96+
mysql_mutex_unlock(&broadcast_run_lock);
97+
LogPluginErr(ERROR_LEVEL, ER_GRP_RPL_CERT_BROADCAST_THREAD_CREATE_FAILED);
98+
return 1;
99+
/* purecov: end */
97100
}
98101
broadcast_thd_state.set_created();
99102

@@ -108,13 +111,13 @@ int Certifier_broadcast_thread::initialize() {
108111
return 0;
109112
}
110113

111-
int Certifier_broadcast_thread::terminate() {
114+
void Certifier_broadcast_thread::terminate() {
112115
DBUG_TRACE;
113116

114117
mysql_mutex_lock(&broadcast_run_lock);
115118
if (broadcast_thd_state.is_thread_dead()) {
116119
mysql_mutex_unlock(&broadcast_run_lock);
117-
return 0;
120+
return;
118121
}
119122

120123
aborted = true;
@@ -135,8 +138,6 @@ int Certifier_broadcast_thread::terminate() {
135138
mysql_cond_timedwait(&broadcast_run_cond, &broadcast_run_lock, &abstime);
136139
}
137140
mysql_mutex_unlock(&broadcast_run_lock);
138-
139-
return 0;
140141
}
141142

142143
void Certifier_broadcast_thread::dispatcher() {
@@ -156,6 +157,8 @@ void Certifier_broadcast_thread::dispatcher() {
156157
mysql_cond_broadcast(&broadcast_run_cond);
157158
mysql_mutex_unlock(&broadcast_run_lock);
158159

160+
LogPluginErr(SYSTEM_LEVEL, ER_GRP_RPL_CERT_BROADCAST_THREAD_STARTED);
161+
159162
while (!aborted) {
160163
// Increase Group Replication feature usage every 10 minutes.
161164
if (broadcast_counter % 600 == 0 ||
@@ -216,6 +219,8 @@ void Certifier_broadcast_thread::dispatcher() {
216219
mysql_cond_broadcast(&broadcast_run_cond);
217220
mysql_mutex_unlock(&broadcast_run_lock);
218221

222+
LogPluginErr(SYSTEM_LEVEL, ER_GRP_RPL_CERT_BROADCAST_THREAD_STOPPED);
223+
219224
my_thread_exit(nullptr);
220225
}
221226

@@ -324,6 +329,10 @@ Certifier::Certifier()
324329
Certifier::~Certifier() {
325330
mysql_mutex_lock(&LOCK_certification_info);
326331
initialized = false;
332+
333+
broadcast_thread->terminate();
334+
delete broadcast_thread;
335+
327336
clear_certification_info();
328337
delete certification_info_tsid_map;
329338

@@ -334,7 +343,6 @@ Certifier::~Certifier() {
334343
delete group_gtid_extracted;
335344
delete group_gtid_tsid_map;
336345
mysql_mutex_unlock(&LOCK_certification_info);
337-
delete broadcast_thread;
338346

339347
mysql_mutex_lock(&LOCK_members);
340348
clear_members();
@@ -566,15 +574,6 @@ int Certifier::initialize(ulonglong gtid_assignment_block_size) {
566574
return error;
567575
}
568576

569-
int Certifier::terminate() {
570-
DBUG_TRACE;
571-
int error = 0;
572-
573-
if (is_initialized()) error = broadcast_thread->terminate();
574-
575-
return error;
576-
}
577-
578577
void Certifier::update_parallel_applier_indexes(
579578
bool update_parallel_applier_last_committed_global,
580579
bool increment_parallel_applier_sequence_number) {

plugin/group_replication/src/handlers/certification_handler.cc

-2
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,6 @@ int Certification_handler::handle_action(Pipeline_action *action) {
100100
Handler_THD_setup_action *thd_conf_action =
101101
(Handler_THD_setup_action *)action;
102102
applier_module_thd = thd_conf_action->get_THD_object();
103-
} else if (action_type == HANDLER_STOP_ACTION) {
104-
error = cert_module->terminate();
105103
}
106104

107105
if (error) return error;

share/messages_to_error_log.txt

+9
Original file line numberDiff line numberDiff line change
@@ -12330,6 +12330,15 @@ ER_CHECK_TABLE_FUNCTIONS
1233012330
ER_CHECK_TABLE_FUNCTIONS_DETAIL
1233112331
eng "%s"
1233212332

12333+
ER_GRP_RPL_CERT_BROADCAST_THREAD_CREATE_FAILED
12334+
eng "Failed to create the Group Replication certifier broadcast thread (THD_certifier_broadcast)."
12335+
12336+
ER_GRP_RPL_CERT_BROADCAST_THREAD_STARTED
12337+
eng "The Group Replication certifier broadcast thread (THD_certifier_broadcast) started."
12338+
12339+
ER_GRP_RPL_CERT_BROADCAST_THREAD_STOPPED
12340+
eng "The Group Replication certifier broadcast thread (THD_certifier_broadcast) stopped."
12341+
1233312342
#
1233412343
# End of 8.0 error messages intended to be written to the server error log.
1233512344
#

0 commit comments

Comments
 (0)