Skip to content

Commit e4a61d4

Browse files
committed
Bug#36886242 Node failure late in Index Statistics Update schema
transaction causes president failure Problem The Index Statistics UPDATE_STAT Schema Transaction can be generated automatically by Ndb data nodes in response to monitoring data change rates, or triggered as a part of schema changes or manually by ANALYZE_TABLE et al. Most Schema Transactions (ST) involve all data nodes equally and so the survivable loss of one data node does not affect the outcome, and the surviving nodes carry on. The UPDATE_STAT ST has phases operating only on one node, so that failure of that node does affect the overall outcome. This is handled by requiring that any node failure during the UPDATE_STAT ST results in the transaction being rolled back. However this mechanism did not handle the case where the UPDATE_STAT ST was already in the short final commit or complete phases. In that case it caused an assertion failure at the Master/President node. That behaves as a cascading node failure - one node failure leading to another, which endangers availability, especially if the nodes involved are in the same nodegroup. Solution The UPDATE_STAT ST rollback on node failure behaviour is limited to the pre-commit ST states. This avoids the assertion failure and cascading node failures. Testing A new testcase is added : testDict -n IndexStatNodeFailures T1 Change-Id: I5915087fa5d24b50e11bb5697e93400443a367d3
1 parent e706ffe commit e4a61d4

File tree

5 files changed

+131
-6
lines changed

5 files changed

+131
-6
lines changed

storage/ndb/src/kernel/blocks/ERROR_codes.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Next NDBFS 2003
2727
Next DBACC 3006
2828
Next DBTUP 4039
2929
Next DBLQH 5112
30-
Next DBDICT 6223
30+
Next DBDICT 6227
3131
Next DBDIH 7251
3232
Next DBTC 8124
3333
Next TRPMAN 9007

storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16839,6 +16839,8 @@ Dbdict::indexStat_prepare(Signal* signal, SchemaOpPtr op_ptr)
1683916839

1684016840
D("indexStat_prepare" << V(*op_ptr.p));
1684116841

16842+
CRASH_INSERTION(6224);
16843+
1684216844
if (impl_req->requestType == IndexStatReq::RT_UPDATE_STAT ||
1684316845
impl_req->requestType == IndexStatReq::RT_DELETE_STAT) {
1684416846
// the main op of stat update or delete does nothing
@@ -16895,7 +16897,11 @@ Dbdict::indexStat_toLocalStat(Signal* signal, SchemaOpPtr op_ptr)
1689516897

1689616898
switch (impl_req->requestType) {
1689716899
case IndexStatReq::RT_SCAN_FRAG:
16898-
trans_ptr.p->m_abort_on_node_fail = true;
16900+
/**
16901+
* Node failure during prepare phase should result in ST rollback
16902+
* to handle the case where the single scanning node has failed
16903+
*/
16904+
trans_ptr.p->m_abort_on_node_fail_pre_commit = true;
1689916905
req->fragId = indexPtr.p->indexStatFragId;
1690016906
if (!do_action(trans_ptr.p->m_nodes, indexPtr.p->indexStatNodes,
1690116907
getOwnNodeId()))
@@ -16990,6 +16996,7 @@ Dbdict::indexStat_commit(Signal* signal, SchemaOpPtr op_ptr)
1699016996
IndexStatRecPtr indexStatPtr;
1699116997
getOpRec(op_ptr, indexStatPtr);
1699216998
D("indexStat_commit" << *op_ptr.p);
16999+
CRASH_INSERTION(6225);
1699317000
sendTransConf(signal, op_ptr);
1699417001
}
1699517002

@@ -17002,6 +17009,7 @@ Dbdict::indexStat_complete(Signal* signal, SchemaOpPtr op_ptr)
1700217009
IndexStatRecPtr indexStatPtr;
1700317010
getOpRec(op_ptr, indexStatPtr);
1700417011
D("indexStat_complete" << *op_ptr.p);
17012+
CRASH_INSERTION(6226);
1700517013
sendTransConf(signal, op_ptr);
1700617014
}
1700717015

@@ -30383,15 +30391,37 @@ Dbdict::execSCHEMA_TRANS_IMPL_REF(Signal* signal)
3038330391
jam();
3038430392
// trans_ptr.p->m_nodes.clear(nodeId);
3038530393
// No need to clear, will be cleared when next REQ is set
30386-
if (!trans_ptr.p->m_abort_on_node_fail)
30394+
if (!trans_ptr.p->m_abort_on_node_fail_pre_commit)
3038730395
{
3038830396
jam();
3038930397
ref->errorCode = 0;
3039030398
}
3039130399
else
3039230400
{
3039330401
jam();
30394-
ref->errorCode = SchemaTransBeginRef::Nodefailure;
30402+
/* Abort on node fail if in pre-commit phase */
30403+
switch (trans_ptr.p->m_state) {
30404+
case SchemaTrans::TS_FLUSH_COMMIT:
30405+
jam();
30406+
/* fallthrough */;
30407+
case SchemaTrans::TS_COMMITTING:
30408+
jam();
30409+
/* fallthrough */;
30410+
case SchemaTrans::TS_FLUSH_COMPLETE:
30411+
jam();
30412+
/* fallthrough */;
30413+
case SchemaTrans::TS_COMPLETING:
30414+
jam();
30415+
/* fallthrough */;
30416+
case SchemaTrans::TS_ENDING:
30417+
jam();
30418+
/* Ignore */
30419+
ref->errorCode = 0;
30420+
break;
30421+
default:
30422+
ref->errorCode = SchemaTransBeginRef::Nodefailure;
30423+
break;
30424+
}
3039530425
}
3039630426
}
3039730427

storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,7 +2380,7 @@ class Dbdict: public SimulatedBlock {
23802380
bool m_flush_complete;
23812381
bool m_flush_end;
23822382
bool m_wait_gcp_on_commit;
2383-
bool m_abort_on_node_fail;
2383+
bool m_abort_on_node_fail_pre_commit;
23842384

23852385
// magic is on when record is seized
23862386
enum { DICT_MAGIC = ~RT_DBDICT_SCHEMA_TRANSACTION };
@@ -2406,7 +2406,7 @@ class Dbdict: public SimulatedBlock {
24062406
m_flush_complete = false;
24072407
m_flush_end = false;
24082408
m_wait_gcp_on_commit = true;
2409-
m_abort_on_node_fail = false;
2409+
m_abort_on_node_fail_pre_commit = false;
24102410
}
24112411

24122412
SchemaTrans(Uint32 the_trans_key) {

storage/ndb/test/ndbapi/testDict.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11745,6 +11745,89 @@ runCreateManyDataFiles(NDBT_Context* ctx, NDBT_Step* step)
1174511745
return result;
1174611746
}
1174711747

11748+
int runIndexStatNF(NDBT_Context *ctx, NDBT_Step *step) {
11749+
NdbRestarter restarter;
11750+
Ndb *pNdb = GETNDB(step);
11751+
NdbDictionary::Dictionary *pDict = pNdb->getDictionary();
11752+
const NdbDictionary::Table *pTab = ctx->getTab();
11753+
if (pTab == NULL) {
11754+
ndbout << "Failed to get table " << endl;
11755+
return NDBT_FAILED;
11756+
}
11757+
11758+
char idxname[20];
11759+
sprintf(idxname, "%s_idx", pTab->getName());
11760+
const NdbDictionary::Index *pIdx = pDict->getIndex(idxname, pTab->getName());
11761+
if (pIdx == NULL) {
11762+
ndbout << "Failed to get index" << idxname << " error "
11763+
<< pDict->getNdbError() << endl;
11764+
return NDBT_FAILED;
11765+
}
11766+
11767+
/**
11768+
* Test node failures while update index stats schema transaction
11769+
* is in various phases (prepare, commit, complete)
11770+
* Test node failures of :
11771+
* Master node (requiring DICT SchemaTrans takeover)
11772+
* Non master node (requiring node failure handling)
11773+
*/
11774+
/* Fail in index stat ST prepare, commit, complete */
11775+
int nfcases[] = {6224, 6225, 6226};
11776+
const int numCases = sizeof(nfcases) / sizeof(int);
11777+
for (int s = 0; s < 2; s++) {
11778+
const char *sname = (s == 0) ? "Master" : "Non-master";
11779+
11780+
ndbout_c("Scenario %d, inserting errors in %s node", s, sname);
11781+
11782+
for (int c = 0; c < numCases; c++) {
11783+
int errorCode = nfcases[c];
11784+
int errorNodeId = restarter.getNode(
11785+
(s == 0) ? NdbRestarter::NS_MASTER : NdbRestarter::NS_NON_MASTER);
11786+
ndbout_c("Inserting error %u in %s node %u", errorCode, sname,
11787+
errorNodeId);
11788+
11789+
if (restarter.insertErrorInNode(errorNodeId, errorCode) != 0) {
11790+
ndbout_c("Failed to insert error");
11791+
return NDBT_FAILED;
11792+
}
11793+
11794+
ndbout_c("Requesting updated index stats");
11795+
do {
11796+
if (pDict->updateIndexStat(*pIdx, *pTab) == 0) {
11797+
ndbout_c("Success generating new index stats");
11798+
break;
11799+
} else {
11800+
const NdbError err = pDict->getNdbError();
11801+
11802+
ndbout_c("Index stat update failed with error %u %s", err.code,
11803+
err.message);
11804+
11805+
if (err.code == 781 || /* Invalid schema transaction key */
11806+
err.code == 286 || /* Node failure caused abort */
11807+
err.code == 787) /* Schema transaction aborted */
11808+
{
11809+
ndbout_c("Retrying");
11810+
continue;
11811+
}
11812+
11813+
return NDBT_FAILED;
11814+
}
11815+
} while (1);
11816+
11817+
ndbout_c("Wait for all data nodes to be running");
11818+
11819+
if (restarter.waitClusterStarted() != 0) {
11820+
ndbout_c("Timed out waiting for nodes to start");
11821+
return NDBT_FAILED;
11822+
}
11823+
}
11824+
}
11825+
11826+
ndbout_c("Success");
11827+
11828+
return NDBT_OK;
11829+
}
11830+
1174811831
NDBT_TESTSUITE(testDict);
1174911832
TESTCASE("testDropDDObjects",
1175011833
"* 1. start cluster\n"
@@ -12179,6 +12262,14 @@ TESTCASE("CreateManyDataFiles", "Test lack of DiskPageBufferMemory "
1217912262
FINALIZER(runDropTableSpaceLG);
1218012263
FINALIZER(changeStartDiskPageBufMem);
1218112264
}
12265+
TESTCASE("IndexStatNodeFailures",
12266+
"Test node failures in various phases of index stat updates") {
12267+
INITIALIZER(runCreateTheTable);
12268+
INITIALIZER(runCreateTheIndex);
12269+
STEP(runIndexStatNF);
12270+
FINALIZER(runDropTheIndex);
12271+
FINALIZER(runDropTheTable);
12272+
}
1218212273

1218312274
NDBT_TESTSUITE_END(testDict);
1218412275

storage/ndb/test/run-test/daily-devel--07-tests.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,3 +289,7 @@ cmd: test_event
289289
args: -n MergeEventConsumer_Graceful T1 --loops=3
290290
max-time: 480
291291

292+
cmd: testDict
293+
args: -n IndexStatNodeFailures T1
294+
max-time: 360
295+

0 commit comments

Comments
 (0)