From 8d6bd1edff03081e84920e3122639c458fa044bd Mon Sep 17 00:00:00 2001 From: Mark Callaghan Date: Wed, 19 Jul 2023 16:24:32 -0700 Subject: [PATCH 1/2] Add rocksdb_block_cache_numshardbits for issue 1336 This fixes https://github.com/facebook/mysql-5.6/issues/1336 The default value is -1 to match existing behavior. When -1 RocksDB code will determine the number of block cache shards as min(6, rocksdb_block_cache_size / min_shard_size) and today min_shard_size is 512K for LRU and 32M for Hyper. The math above frequently results in a block cache with too many small shards when rocksdb_block_cache_size is not too big (a few GB is not too big) and there will be perf problems that are hard to debug in such a case. --- mysql-test/suite/rocksdb/r/rocksdb.result | 1 + .../rocksdb_block_cache_numshardbits_basic.result | 7 +++++++ .../t/rocksdb_block_cache_numshardbits_basic.test | 6 ++++++ storage/rocksdb/ha_rocksdb.cc | 15 ++++++++++++--- 4 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 mysql-test/suite/rocksdb_sys_vars/r/rocksdb_block_cache_numshardbits_basic.result create mode 100644 mysql-test/suite/rocksdb_sys_vars/t/rocksdb_block_cache_numshardbits_basic.test diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result index 4afeb59e39b2..31d2f5d400ce 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb.result +++ b/mysql-test/suite/rocksdb/r/rocksdb.result @@ -907,6 +907,7 @@ rocksdb_binlog_ttl OFF rocksdb_binlog_ttl_compaction_ts_interval_secs 3600 rocksdb_binlog_ttl_compaction_ts_offset_secs 60 rocksdb_blind_delete_primary_key OFF +rocksdb_block_cache_numshardbits -1 rocksdb_block_cache_size 536870912 rocksdb_block_restart_interval 16 rocksdb_block_size 4096 diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_block_cache_numshardbits_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_block_cache_numshardbits_basic.result new file mode 100644 index 000000000000..a30547b99e54 --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_block_cache_numshardbits_basic.result @@ -0,0 +1,7 @@ +SET @start_global_value = @@global.ROCKSDB_BLOCK_CACHE_NUMSHARDBITS; +SELECT @start_global_value; +@start_global_value +-1 +"Trying to set variable @@global.ROCKSDB_BLOCK_CACHE_NUMSHARDBITS to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_BLOCK_CACHE_NUMSHARDBITS = 444; +ERROR HY000: Variable 'rocksdb_block_cache_numshardbits' is a read only variable diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_block_cache_numshardbits_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_block_cache_numshardbits_basic.test new file mode 100644 index 000000000000..924c7abe11fc --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_block_cache_numshardbits_basic.test @@ -0,0 +1,6 @@ +--source include/have_rocksdb.inc + +--let $sys_var=ROCKSDB_BLOCK_CACHE_NUMSHARDBITS +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 96268affc96c..3df683537b65 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -920,6 +920,7 @@ static long long rocksdb_compaction_sequential_deletes_file_size = 0l; static uint32_t rocksdb_validate_tables = 1; char *rocksdb_datadir; static uint32_t rocksdb_max_bottom_pri_background_compactions = 0; +static int32_t rocksdb_block_cache_numshardbits = -1; static uint32_t rocksdb_table_stats_sampling_pct; static uint32_t rocksdb_table_stats_recalc_threshold_pct = 10; static unsigned long long rocksdb_table_stats_recalc_threshold_count = 100ul; @@ -2052,6 +2053,13 @@ static MYSQL_SYSVAR_INT(table_cache_numshardbits, // fails to create a cache and returns a nullptr /* min */ 0, /* max */ 19, 0); +static MYSQL_SYSVAR_INT(block_cache_numshardbits, + rocksdb_block_cache_numshardbits, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Block cache numshardbits for RocksDB", + nullptr, nullptr, + /* default */ -1, /* min */ -1, /* max */ 8, 0); + static MYSQL_SYSVAR_UINT64_T(wal_ttl_seconds, rocksdb_db_options->WAL_ttl_seconds, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -3039,6 +3047,7 @@ static struct SYS_VAR *rocksdb_system_variables[] = { MYSQL_SYSVAR(keep_log_file_num), MYSQL_SYSVAR(max_manifest_file_size), MYSQL_SYSVAR(table_cache_numshardbits), + MYSQL_SYSVAR(block_cache_numshardbits), MYSQL_SYSVAR(wal_ttl_seconds), MYSQL_SYSVAR(wal_size_limit_mb), MYSQL_SYSVAR(manifest_preallocation_size), @@ -7952,13 +7961,13 @@ static int rocksdb_init_internal(void *const p) { rocksdb_use_hyper_clock_cache ? rocksdb::HyperClockCacheOptions( rocksdb_block_cache_size, rocksdb_tbl_options->block_size, - -1 - /* num_shard_bits */, + rocksdb_block_cache_numshardbits /* num_shard_bits */, false /* strict_capacity_limit */, memory_allocator) .MakeSharedCache() : rocksdb::NewLRUCache( - rocksdb_block_cache_size, -1 /*num_shard_bits*/, + rocksdb_block_cache_size, + rocksdb_block_cache_numshardbits /*num_shard_bits*/, false /*strict_capcity_limit*/, rocksdb_cache_high_pri_pool_ratio, memory_allocator); if (rocksdb_sim_cache_size > 0) { From f4aecdd1222eff1b561fcbcb6db4e86b1d25b0d5 Mon Sep 17 00:00:00 2001 From: Mark Callaghan Date: Thu, 20 Jul 2023 10:14:32 -0700 Subject: [PATCH 2/2] review comments --- mysql-test/r/mysqld--help-notwin.result | 3 +++ storage/rocksdb/ha_rocksdb.cc | 13 ++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result index 13f9a42ea444..1319f4323e85 100644 --- a/mysql-test/r/mysqld--help-notwin.result +++ b/mysql-test/r/mysqld--help-notwin.result @@ -1855,6 +1855,8 @@ The following options may be given as the first argument: Deleting rows by primary key lookup, without reading rows (Blind Deletes). Blind delete is disabled if the table has secondary key + --rocksdb-block-cache-numshardbits=# + Block cache numshardbits for RocksDB --rocksdb-block-cache-size=# block_cache size for RocksDB --rocksdb-block-restart-interval=# @@ -3553,6 +3555,7 @@ rocksdb-binlog-ttl FALSE rocksdb-binlog-ttl-compaction-ts-interval-secs 3600 rocksdb-binlog-ttl-compaction-ts-offset-secs 60 rocksdb-blind-delete-primary-key FALSE +rocksdb-block-cache-numshardbits -1 rocksdb-block-cache-size 536870912 rocksdb-block-restart-interval 16 rocksdb-block-size 4096 diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 3df683537b65..043d2333709d 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -920,7 +920,7 @@ static long long rocksdb_compaction_sequential_deletes_file_size = 0l; static uint32_t rocksdb_validate_tables = 1; char *rocksdb_datadir; static uint32_t rocksdb_max_bottom_pri_background_compactions = 0; -static int32_t rocksdb_block_cache_numshardbits = -1; +static int rocksdb_block_cache_numshardbits = -1; static uint32_t rocksdb_table_stats_sampling_pct; static uint32_t rocksdb_table_stats_recalc_threshold_pct = 10; static unsigned long long rocksdb_table_stats_recalc_threshold_count = 100ul; @@ -2056,9 +2056,9 @@ static MYSQL_SYSVAR_INT(table_cache_numshardbits, static MYSQL_SYSVAR_INT(block_cache_numshardbits, rocksdb_block_cache_numshardbits, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Block cache numshardbits for RocksDB", - nullptr, nullptr, - /* default */ -1, /* min */ -1, /* max */ 8, 0); + "Block cache numshardbits for RocksDB", nullptr, + nullptr, + /* default */ -1, /* min */ -1, /* max */ 8, 0); static MYSQL_SYSVAR_UINT64_T(wal_ttl_seconds, rocksdb_db_options->WAL_ttl_seconds, @@ -7961,13 +7961,12 @@ static int rocksdb_init_internal(void *const p) { rocksdb_use_hyper_clock_cache ? rocksdb::HyperClockCacheOptions( rocksdb_block_cache_size, rocksdb_tbl_options->block_size, - rocksdb_block_cache_numshardbits /* num_shard_bits */, + rocksdb_block_cache_numshardbits, false /* strict_capacity_limit */, memory_allocator) .MakeSharedCache() : rocksdb::NewLRUCache( - rocksdb_block_cache_size, - rocksdb_block_cache_numshardbits /*num_shard_bits*/, + rocksdb_block_cache_size, rocksdb_block_cache_numshardbits, false /*strict_capcity_limit*/, rocksdb_cache_high_pri_pool_ratio, memory_allocator); if (rocksdb_sim_cache_size > 0) {