Skip to content

Commit b2ec03a

Browse files
standardize compare tool to allow for oss data related comparisons (#322)
* Remote result post process fix * Remote result post process fix * compare OSS WIP * Bumping version from 0.7.16 to 0.7.17 * Fixed flake8 error
1 parent 62d3adb commit b2ec03a

File tree

3 files changed

+71
-37
lines changed

3 files changed

+71
-37
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "redisbench-admin"
3-
version = "0.7.16"
3+
version = "0.7.17"
44
description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
55
authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
66
readme = "README.md"

redisbench_admin/compare/args.py

+3
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ def create_compare_arguments(parser):
5252
parser.add_argument("--comparison-branch", type=str, default=None, required=False)
5353
parser.add_argument("--comparison-tag", type=str, default=None, required=False)
5454
parser.add_argument("--print-regressions-only", type=bool, default=False)
55+
parser.add_argument("--verbose", type=bool, default=False)
56+
parser.add_argument("--use_metric_context_path", type=bool, default=False)
57+
parser.add_argument("--testname_regex", type=str, default=".*", required=False)
5558
parser.add_argument(
5659
"--regressions-percent-lower-limit",
5760
type=float,

redisbench_admin/compare/compare.py

+67-36
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,17 @@
44
# All rights reserved.
55
#
66
import logging
7+
import re
8+
79
import pandas as pd
810
import redis
911
from pytablewriter import MarkdownTableWriter
1012
import humanize
1113
import datetime as dt
12-
14+
from tqdm import tqdm
1315
from redisbench_admin.utils.remote import get_overall_dashboard_keynames
1416

1517

16-
from redisbench_admin.utils.utils import get_ts_metric_name
17-
18-
1918
def compare_command_logic(args, project_name, project_version):
2019
logging.info(
2120
"Using: {project_name} {project_version}".format(
@@ -99,29 +98,46 @@ def compare_command_logic(args, project_name, project_version):
9998
_,
10099
_,
101100
_,
102-
_,
101+
testcases_metric_context_path_setname,
103102
_,
104103
_,
105104
_,
106105
_,
107106
_,
108107
) = get_overall_dashboard_keynames(tf_github_org, tf_github_repo, tf_triggering_env)
109108
test_names = []
109+
used_key = testcases_setname
110+
test_filter = "test_name"
111+
112+
if args.use_metric_context_path:
113+
test_filter = "test_name:metric_context_path"
114+
used_key = testcases_metric_context_path_setname
115+
116+
tags_regex_string = re.compile(args.testname_regex)
117+
110118
try:
111-
test_names = rts.smembers(testcases_setname)
119+
test_names = rts.smembers(used_key)
112120
test_names = list(test_names)
113121
test_names.sort()
122+
final_test_names = []
123+
for test_name in test_names:
124+
test_name = test_name.decode()
125+
match_obj = re.search(tags_regex_string, test_name)
126+
if match_obj is not None:
127+
final_test_names.append(test_name)
128+
test_names = final_test_names
129+
114130
except redis.exceptions.ResponseError as e:
115131
logging.warning(
116132
"Error while trying to fetch test cases set (key={}) {}. ".format(
117-
testcases_setname, e.__str__()
133+
used_key, e.__str__()
118134
)
119135
)
120136
pass
121137

122138
logging.warning(
123-
"Based on test-cases set (key={}) we have {} distinct benchmarks. ".format(
124-
testcases_setname, len(test_names)
139+
"Based on test-cases set (key={}) we have {} comparison points. ".format(
140+
used_key, len(test_names)
125141
)
126142
)
127143
profilers_artifacts_matrix = []
@@ -131,31 +147,42 @@ def compare_command_logic(args, project_name, project_version):
131147
total_unstable = 0
132148
total_regressions = 0
133149
noise_waterline = 2.5
150+
progress = tqdm(unit="benchmark time-series", total=len(test_names))
134151
for test_name in test_names:
152+
filters_baseline = [
153+
"{}={}".format(by_str, baseline_str),
154+
"metric={}".format(metric_name),
155+
"{}={}".format(test_filter, test_name),
156+
"deployment_type={}".format(deployment_type),
157+
"deployment_name={}".format(deployment_name),
158+
]
159+
filters_comparison = [
160+
"{}={}".format(by_str, comparison_str),
161+
"metric={}".format(metric_name),
162+
"{}={}".format(test_filter, test_name),
163+
"deployment_type={}".format(deployment_type),
164+
"deployment_name={}".format(deployment_name),
165+
]
166+
baseline_timeseries = rts.ts().queryindex(filters_baseline)
167+
comparison_timeseries = rts.ts().queryindex(filters_comparison)
168+
progress.update()
169+
if len(baseline_timeseries) != 1:
170+
if args.verbose:
171+
logging.warning(
172+
"Baseline timeseries {}".format(len(baseline_timeseries))
173+
)
174+
continue
175+
else:
176+
ts_name_baseline = baseline_timeseries[0]
177+
if len(comparison_timeseries) != 1:
178+
if args.verbose:
179+
logging.warning(
180+
"Comparison timeseries {}".format(len(comparison_timeseries))
181+
)
182+
continue
183+
else:
184+
ts_name_comparison = comparison_timeseries[0]
135185

136-
test_name = test_name.decode()
137-
ts_name_baseline = get_ts_metric_name(
138-
"by.{}".format(by_str),
139-
baseline_str,
140-
tf_github_org,
141-
tf_github_repo,
142-
deployment_name,
143-
deployment_type,
144-
test_name,
145-
tf_triggering_env,
146-
metric_name,
147-
)
148-
ts_name_comparison = get_ts_metric_name(
149-
"by.{}".format(by_str),
150-
comparison_str,
151-
tf_github_org,
152-
tf_github_repo,
153-
deployment_name,
154-
deployment_type,
155-
test_name,
156-
tf_triggering_env,
157-
metric_name,
158-
)
159186
baseline_v = "N/A"
160187
comparison_v = "N/A"
161188
baseline_nsamples = 0
@@ -223,14 +250,14 @@ def compare_command_logic(args, project_name, project_version):
223250
unstable = True
224251
if baseline_pct_change > 10.0:
225252
stamp_b = "UNSTABLE"
226-
baseline_v_str = " {:.3f} +- {:.1f}% {}".format(
227-
baseline_v, baseline_pct_change, stamp_b
253+
baseline_v_str = " {:.0f} +- {:.1f}% {} ({} datapoints)".format(
254+
baseline_v, baseline_pct_change, stamp_b, baseline_nsamples
228255
)
229256
stamp_c = ""
230257
if comparison_pct_change > 10.0:
231258
stamp_c = "UNSTABLE"
232-
comparison_v_str = " {:.3f} +- {:.1f}% {}".format(
233-
comparison_v, comparison_pct_change, stamp_c
259+
comparison_v_str = " {:.0f} +- {:.1f}% {} ({} datapoints)".format(
260+
comparison_v, comparison_pct_change, stamp_c, comparison_nsamples
234261
)
235262
if metric_mode == "higher-better":
236263
percentage_change = (
@@ -252,6 +279,8 @@ def compare_command_logic(args, project_name, project_version):
252279
note = note + " REGRESSION"
253280
elif percentage_change < -noise_waterline:
254281
note = note + " potential REGRESSION"
282+
else:
283+
note = note + " -- no change --"
255284
detected_regressions.append(test_name)
256285
if percentage_change > 0.0 and not unstable:
257286
if percentage_change > waterline:
@@ -260,6 +289,8 @@ def compare_command_logic(args, project_name, project_version):
260289
note = note + " IMPROVEMENT"
261290
elif percentage_change > noise_waterline:
262291
note = note + " potential IMPROVEMENT"
292+
else:
293+
note = note + " -- no change --"
263294

264295
if (
265296
detected_improvement is False

0 commit comments

Comments
 (0)