Skip to content

Commit 5228410

Browse files
Enabled overall cpu stats collection and summary for local and remote runs (#314)
* Enabled long-running benchmarks (with watchdog enforcing deletion) via timeout_secods property on benchmark definition * Fixes per flake8 review * Enabled overall cpu stats collection and summary for local and remote runs * Tracking cpu usage as close as possible to start/stop time of benchmark
1 parent 03f688d commit 5228410

File tree

9 files changed

+197
-22
lines changed

9 files changed

+197
-22
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "redisbench-admin"
3-
version = "0.7.12"
3+
version = "0.7.13"
44
description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
55
authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
66
readme = "README.md"

redisbench_admin/environments/oss_cluster.py

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def spin_up_local_redis_cluster(
4848
result = wait_for_conn(r, dataset_load_timeout_secs)
4949
if result is True:
5050
logging.info("Redis available. pid={}".format(redis_process.pid))
51+
r.client_setname("redisbench-admin-cluster-#{}".format(master_shard_id))
5152
redis_conns.append(r)
5253
redis_processes.append(redis_process)
5354
return redis_processes, redis_conns

redisbench_admin/run/common.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -372,11 +372,19 @@ def check_dbconfig_keyspacelen_requirement(
372372
required = False
373373
keyspacelen = None
374374
if dbconfig_keyname in benchmark_config:
375-
for k in benchmark_config[dbconfig_keyname]:
376-
if "check" in k:
377-
if "keyspacelen" in k["check"]:
375+
if type(benchmark_config[dbconfig_keyname]) == list:
376+
for k in benchmark_config[dbconfig_keyname]:
377+
if "check" in k:
378+
if "keyspacelen" in k["check"]:
379+
required = True
380+
keyspacelen = int(k["check"]["keyspacelen"])
381+
if type(benchmark_config[dbconfig_keyname]) == dict:
382+
if "check" in benchmark_config[dbconfig_keyname]:
383+
if "keyspacelen" in benchmark_config[dbconfig_keyname]["check"]:
378384
required = True
379-
keyspacelen = int(k["check"]["keyspacelen"])
385+
keyspacelen = int(
386+
benchmark_config[dbconfig_keyname]["check"]["keyspacelen"]
387+
)
380388
return required, keyspacelen
381389

382390

@@ -626,7 +634,12 @@ def common_properties_log(
626634

627635

628636
def print_results_table_stdout(
629-
benchmark_config, default_metrics, results_dict, setup_name, test_name
637+
benchmark_config,
638+
default_metrics,
639+
results_dict,
640+
setup_name,
641+
test_name,
642+
cpu_usage=None,
630643
):
631644
# check which metrics to extract
632645
(_, metrics,) = merge_default_and_config_metrics(
@@ -640,6 +653,8 @@ def print_results_table_stdout(
640653
"Metric Value",
641654
]
642655
results_matrix = extract_results_table(metrics, results_dict)
656+
if cpu_usage is not None:
657+
results_matrix.append(["Total shards CPU usage %", "", "", cpu_usage])
643658
results_matrix = [[x[0], "{:.3f}".format(x[3])] for x in results_matrix]
644659
writer = MarkdownTableWriter(
645660
table_name=table_name,

redisbench_admin/run/metrics.py

+48
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,51 @@ def collect_redis_metrics(
129129
kv_overall[metric_name] = metric_value
130130

131131
return start_time_ms, res, kv_overall
132+
133+
134+
def from_info_to_overall_shard_cpu(benchmark_cpu_stats):
135+
total_avg_cpu_pct = 0.0
136+
res = {}
137+
for shard_n, cpu_stats_arr in benchmark_cpu_stats.items():
138+
# we need at least 2 elements to compute the cpu usage
139+
if len(cpu_stats_arr) < 2:
140+
avg_cpu_pct = None
141+
else:
142+
start_ts_micros = cpu_stats_arr[0]["server_time_usec"]
143+
start_total_cpu = get_total_cpu(cpu_stats_arr[0])
144+
end_ts_micros = cpu_stats_arr[len(cpu_stats_arr) - 1]["server_time_usec"]
145+
end_total_cpu = get_total_cpu(cpu_stats_arr[len(cpu_stats_arr) - 1])
146+
total_secs = (end_ts_micros - start_ts_micros) / 1000000
147+
total_cpu_usage = end_total_cpu - start_total_cpu
148+
avg_cpu_pct = 100.0 * (total_cpu_usage / total_secs)
149+
res[shard_n] = avg_cpu_pct
150+
total_avg_cpu_pct += avg_cpu_pct
151+
return total_avg_cpu_pct, res
152+
153+
154+
def get_total_cpu(info_data):
155+
total_cpu = 0.0
156+
total_cpu = total_cpu + info_data["used_cpu_sys"]
157+
total_cpu = total_cpu + info_data["used_cpu_user"]
158+
return total_cpu
159+
160+
161+
BENCHMARK_RUNNING_GLOBAL = False
162+
BENCHMARK_CPU_STATS_GLOBAL = {}
163+
164+
165+
def collect_cpu_data(redis_conns=[], delta_secs: float = 5.0, delay_start: float = 1.0):
166+
global BENCHMARK_CPU_STATS_GLOBAL
167+
global BENCHMARK_RUNNING_GLOBAL
168+
import time
169+
170+
counter = 0
171+
time.sleep(delay_start)
172+
while BENCHMARK_RUNNING_GLOBAL:
173+
for shard_n, redis_conn in enumerate(redis_conns, 1):
174+
keyname = "{}".format(shard_n)
175+
if keyname not in BENCHMARK_CPU_STATS_GLOBAL:
176+
BENCHMARK_CPU_STATS_GLOBAL[keyname] = []
177+
BENCHMARK_CPU_STATS_GLOBAL[keyname].append(redis_conn.info())
178+
time.sleep(delta_secs)
179+
counter += 1

redisbench_admin/run_local/local_db.py

+2
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ def local_db_spin(
117117
)
118118

119119
r = redis.Redis(port=args.port)
120+
r.ping()
121+
r.client_setname("redisbench-admin-stadalone")
120122
redis_conns.append(r)
121123

122124
for shardn, redis_process in enumerate(redis_processes):

redisbench_admin/run_local/run_local.py

+32
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import traceback
1313
import redis
1414

15+
import redisbench_admin.run.metrics
1516
from redisbench_admin.profilers.profilers_schema import (
1617
local_profilers_print_artifacts_table,
1718
)
@@ -23,6 +24,10 @@
2324
dso_check,
2425
print_results_table_stdout,
2526
)
27+
from redisbench_admin.run.metrics import (
28+
from_info_to_overall_shard_cpu,
29+
collect_cpu_data,
30+
)
2631
from redisbench_admin.run.redistimeseries import datasink_profile_tabular_data
2732
from redisbench_admin.run.run import (
2833
calculate_client_tool_duration_and_check,
@@ -51,6 +56,8 @@
5156
)
5257
from redisbench_admin.utils.results import post_process_benchmark_results
5358

59+
import threading
60+
5461

5562
def run_local_command_logic(args, project_name, project_version):
5663
logging.info(
@@ -159,6 +166,7 @@ def run_local_command_logic(args, project_name, project_version):
159166
continue
160167
if setup_type in args.allowed_envs:
161168
redis_processes = []
169+
redis_conns = []
162170
# after we've spinned Redis, even on error we should always teardown
163171
# in case of some unexpected error we fail the test
164172
# noinspection PyBroadException
@@ -270,11 +278,34 @@ def run_local_command_logic(args, project_name, project_version):
270278
)
271279

272280
# run the benchmark
281+
cpu_stats_thread = threading.Thread(
282+
target=collect_cpu_data,
283+
args=(redis_conns, 5.0, 1.0),
284+
)
285+
redisbench_admin.run.metrics.BENCHMARK_RUNNING_GLOBAL = (
286+
True
287+
)
288+
cpu_stats_thread.start()
273289
benchmark_start_time = datetime.datetime.now()
274290
stdout, stderr = run_local_benchmark(
275291
benchmark_tool, command
276292
)
277293
benchmark_end_time = datetime.datetime.now()
294+
redisbench_admin.run.metrics.BENCHMARK_RUNNING_GLOBAL = (
295+
False
296+
)
297+
cpu_stats_thread.join()
298+
(
299+
total_shards_cpu_usage,
300+
cpu_usage_map,
301+
) = from_info_to_overall_shard_cpu(
302+
redisbench_admin.run.metrics.BENCHMARK_CPU_STATS_GLOBAL
303+
)
304+
logging.info(
305+
"Total CPU usage ({:.3f} %)".format(
306+
total_shards_cpu_usage
307+
)
308+
)
278309
benchmark_duration_seconds = (
279310
calculate_client_tool_duration_and_check(
280311
benchmark_end_time, benchmark_start_time
@@ -339,6 +370,7 @@ def run_local_command_logic(args, project_name, project_version):
339370
results_dict,
340371
setup_name,
341372
test_name,
373+
total_shards_cpu_usage,
342374
)
343375

344376
# check KPIs

redisbench_admin/run_remote/remote_client.py

+55-13
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
#
66
import datetime
77
import logging
8+
import threading
89

10+
import redisbench_admin
911
from redisbench_admin.run.common import (
1012
prepare_benchmark_parameters,
1113
)
14+
from redisbench_admin.run.metrics import collect_cpu_data
1215
from redisbench_admin.run.run import calculate_client_tool_duration_and_check
1316
from redisbench_admin.run_remote.remote_helpers import (
1417
benchmark_tools_sanity_check,
@@ -46,6 +49,8 @@ def run_remote_client_tool(
4649
warn_min_duration,
4750
client_ssh_port,
4851
private_key,
52+
collect_cpu_stats_thread=False,
53+
redis_conns=[],
4954
):
5055
(
5156
benchmark_min_tool_version,
@@ -105,6 +110,7 @@ def run_remote_client_tool(
105110
tmp = local_bench_fname
106111
local_bench_fname = "result.csv"
107112
commands = [command_str]
113+
post_commands = []
108114
if "ann" in benchmark_tool:
109115
pkg_path = get_ann_remote_pkg_path(
110116
client_public_ip, client_ssh_port, private_key, username
@@ -132,15 +138,25 @@ def run_remote_client_tool(
132138
zip_results_command = "cd {} && zip -r {} results/*".format(
133139
results_outputdir, results_outputdir_zip
134140
)
135-
commands.append(mkdir_command)
136-
commands.append(create_website_command)
137-
commands.append(zip_website_command)
138-
commands.append(zip_results_command)
141+
post_commands.append(mkdir_command)
142+
post_commands.append(create_website_command)
143+
post_commands.append(zip_website_command)
144+
post_commands.append(zip_results_command)
139145

140146
local_output_artifacts.append(website_outputdir_zip_local)
141147
local_output_artifacts.append(results_outputdir_zip_local)
142148
remote_output_artifacts.append(website_outputdir_zip)
143149
remote_output_artifacts.append(results_outputdir_zip)
150+
cpu_stats_thread = None
151+
if collect_cpu_stats_thread is True:
152+
# run the benchmark
153+
cpu_stats_thread = threading.Thread(
154+
target=collect_cpu_data,
155+
args=(redis_conns, 5.0, 1.0),
156+
)
157+
redisbench_admin.run.metrics.BENCHMARK_RUNNING_GLOBAL = True
158+
logging.info("Starting CPU collecing thread")
159+
cpu_stats_thread.start()
144160

145161
benchmark_start_time = datetime.datetime.now()
146162
# run the benchmark
@@ -154,6 +170,32 @@ def run_remote_client_tool(
154170
client_ssh_port,
155171
)
156172
benchmark_end_time = datetime.datetime.now()
173+
if cpu_stats_thread is not None:
174+
logging.info("Stopping CPU collecting thread")
175+
redisbench_admin.run.metrics.BENCHMARK_RUNNING_GLOBAL = False
176+
cpu_stats_thread.join()
177+
logging.info("CPU collecting thread stopped")
178+
if len(post_commands) > 0:
179+
res = execute_remote_commands(
180+
client_public_ip, username, private_key, post_commands, client_ssh_port
181+
)
182+
recv_exit_status, _, _ = res[0]
183+
184+
if recv_exit_status != 0:
185+
logging.error(
186+
"Exit status of remote command execution {}. Printing stdout and stderr".format(
187+
recv_exit_status
188+
)
189+
)
190+
stderr, stdout = print_commands_outputs(post_commands, True, res)
191+
else:
192+
logging.info(
193+
"Remote process exited normally. Exit code {}. Printing stdout.".format(
194+
recv_exit_status
195+
)
196+
)
197+
stderr, stdout = print_commands_outputs(post_commands, False, res)
198+
157199
benchmark_duration_seconds = calculate_client_tool_duration_and_check(
158200
benchmark_end_time, benchmark_start_time, step_name, warn_min_duration
159201
)
@@ -224,15 +266,15 @@ def run_remote_client_tool(
224266
def setup_remote_benchmark_ann(
225267
client_public_ip, username, private_key, client_ssh_port
226268
):
227-
commands = [
228-
"sudo apt install python3-pip -y",
229-
"sudo pip3 install redisbench-admin>=0.7.0",
230-
]
231-
# last argument (get_pty) needs to be set to true
232-
# check: https://stackoverflow.com/questions/5785353/paramiko-and-sudo
233-
execute_remote_commands(
234-
client_public_ip, username, private_key, commands, client_ssh_port, True
235-
)
269+
# commands = [
270+
# "sudo apt install python3-pip -y",
271+
# "sudo pip3 install redisbench-admin>=0.7.0",
272+
# ]
273+
# # last argument (get_pty) needs to be set to true
274+
# # check: https://stackoverflow.com/questions/5785353/paramiko-and-sudo
275+
# execute_remote_commands(
276+
# client_public_ip, username, private_key, commands, client_ssh_port, True
277+
# )
236278
pkg_path = get_ann_remote_pkg_path(
237279
client_public_ip, client_ssh_port, private_key, username
238280
)

redisbench_admin/run_remote/run_remote.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111
import redis
1212
import pytablewriter
1313
from pytablewriter import MarkdownTableWriter
14-
15-
14+
import redisbench_admin.run.metrics
15+
from redisbench_admin.run.metrics import (
16+
from_info_to_overall_shard_cpu,
17+
collect_redis_metrics,
18+
)
1619
from redisbench_admin.profilers.perf_daemon_caller import (
1720
PerfDaemonRemoteCaller,
1821
PERF_DAEMON_LOGNAME,
@@ -27,7 +30,6 @@
2730
)
2831
from redisbench_admin.run.git import git_vars_crosscheck
2932
from redisbench_admin.run.grafana import generate_artifacts_table_grafana_redis
30-
from redisbench_admin.run.metrics import collect_redis_metrics
3133
from redisbench_admin.run.modules import redis_modules_check
3234
from redisbench_admin.run.redistimeseries import (
3335
timeseries_test_sucess_flow,
@@ -493,6 +495,8 @@ def run_remote_command_logic(args, project_name, project_version):
493495
min_recommended_benchmark_duration,
494496
client_ssh_port,
495497
private_key,
498+
True,
499+
redis_conns,
496500
)
497501

498502
if profilers_enabled:
@@ -556,6 +560,18 @@ def run_remote_command_logic(args, project_name, project_version):
556560
)
557561
)
558562

563+
(
564+
total_shards_cpu_usage,
565+
cpu_usage_map,
566+
) = from_info_to_overall_shard_cpu(
567+
redisbench_admin.run.metrics.BENCHMARK_CPU_STATS_GLOBAL
568+
)
569+
logging.info(
570+
"Total CPU usage ({:.3f} %)".format(
571+
total_shards_cpu_usage
572+
)
573+
)
574+
559575
if remote_run_result is False:
560576
db_error_artifacts(
561577
db_ssh_port,
@@ -594,6 +610,9 @@ def run_remote_command_logic(args, project_name, project_version):
594610
]
595611
},
596612
)
613+
overall_end_time_metrics[
614+
"total_shards_used_cpu_pct"
615+
] = total_shards_cpu_usage
597616
expire_ms = 7 * 24 * 60 * 60 * 1000
598617
export_redis_metrics(
599618
artifact_version,
@@ -739,6 +758,7 @@ def run_remote_command_logic(args, project_name, project_version):
739758
results_dict,
740759
setup_name,
741760
test_name,
761+
total_shards_cpu_usage,
742762
)
743763
client_artifacts.append(local_bench_fname)
744764
client_artifacts.extend(client_output_artifacts)

0 commit comments

Comments
 (0)