Skip to content

Commit e387164

Browse files
Enabled remote dataset fetching from url (#63)
* [add] Enabled to specify remote rdb links * [add] Enabled remote dataset fetching from url
1 parent 892d105 commit e387164

File tree

6 files changed

+121
-40
lines changed

6 files changed

+121
-40
lines changed

poetry.lock

Lines changed: 12 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "redisbench-admin"
3-
version = "0.1.54"
3+
version = "0.1.55"
44
description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
55
authors = ["filipecosta90 <[email protected]>"]
66
readme = "README.md"
@@ -29,6 +29,7 @@ python_terraform = "^0.10.1"
2929
GitPython = "^3.1.12"
3030
PyYAML = "^5.4.0"
3131
numpy = "^1.15.4"
32+
wget = "^3.2"
3233

3334
[tool.poetry.dev-dependencies]
3435
pytest = "^4.6"

redisbench_admin/run_local/run_local.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import shutil
77
import subprocess
88
import sys
9+
import tempfile
910

1011
import yaml
1112

@@ -16,7 +17,7 @@
1617
from redisbench_admin.utils.local import (
1718
spinUpLocalRedis,
1819
getLocalRunFullFilename,
19-
isProcessAlive, )
20+
isProcessAlive, checkDatasetLocalRequirements, )
2021
from redisbench_admin.utils.remote import (
2122
extract_git_vars,
2223
validateResultExpectations,
@@ -85,11 +86,22 @@ def run_local_command_logic(args):
8586
# after we've spinned Redis, even on error we should always teardown
8687
# in case of some unexpected error we fail the test
8788
try:
89+
dirname = ".",
8890
# setup Redis
91+
# copy the rdb to DB machine
92+
dataset = None
93+
temporary_dir = tempfile.mkdtemp()
94+
logging.info(
95+
"Using local temporary dir to spin up Redis Instance. Path: {}".format(
96+
temporary_dir
97+
)
98+
)
99+
checkDatasetLocalRequirements(benchmark_config, temporary_dir, dirname)
100+
89101
redis_process = spinUpLocalRedis(
90-
benchmark_config,
102+
temporary_dir,
91103
args.port,
92-
local_module_file, dirname,
104+
local_module_file
93105
)
94106
if isProcessAlive(redis_process) is False:
95107
raise Exception("Redis process is not alive. Failing test.")

redisbench_admin/utils/local.py

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,39 @@
44
import tempfile
55
import time
66
from shutil import copyfile
7-
7+
import wget
88
import redis
99

1010

11-
def checkDatasetLocalRequirements(benchmark_config, redis_tmp_dir, dirname="."):
12-
for k in benchmark_config["dbconfig"]:
13-
if "dataset" in k:
14-
dataset = k["dataset"]
15-
if dataset is not None:
16-
logging.info("Copying rdb {}/{} to {}/dump.rdb".format(dirname, dataset, redis_tmp_dir))
17-
copyfile("{}/{}".format(dirname, dataset), "{}/dump.rdb".format(redis_tmp_dir))
11+
def checkDatasetLocalRequirements(benchmark_config, redis_dbdir, dirname=None, datasets_localtemp_dir="./datasets",dbconfig_keyname="dbconfig"):
12+
dataset = None
13+
full_path = None
14+
tmp_path = None
15+
if dbconfig_keyname in benchmark_config:
16+
for k in benchmark_config[dbconfig_keyname]:
17+
if "dataset" in k:
18+
dataset = k["dataset"]
19+
if dataset is not None:
20+
if dataset.startswith("http"):
21+
if not os.path.isdir(datasets_localtemp_dir):
22+
os.mkdir(datasets_localtemp_dir)
23+
filename = dataset.split("/")[-1]
24+
full_path = '{}/{}'.format(datasets_localtemp_dir, filename)
25+
if not os.path.exists(full_path):
26+
logging.info("Retrieving remote file from {} to {}. Using the dir {} as a cache for next time.".format(dataset,full_path, datasets_localtemp_dir))
27+
wget.download(dataset,full_path)
28+
else:
29+
logging.info(
30+
"Reusing cached remote file (located at {} ).".format(
31+
full_path))
32+
else:
33+
full_path = dataset
34+
if dirname is not None:
35+
full_path = "{}/{}".format(dirname,full_path)
36+
logging.info("Copying rdb from {} to {}/dump.rdb".format(full_path, redis_dbdir))
37+
tmp_path = "{}/dump.rdb".format(redis_dbdir)
38+
copyfile(full_path,tmp_path )
39+
return dataset,full_path,tmp_path
1840

1941

2042
def waitForConn(conn, retries=20, command="PING", shouldBe=True):
@@ -40,21 +62,25 @@ def waitForConn(conn, retries=20, command="PING", shouldBe=True):
4062

4163

4264
def spinUpLocalRedis(
43-
benchmark_config,
65+
dbdir,
4466
port,
4567
local_module_file,
46-
dirname=".",
4768
):
48-
# copy the rdb to DB machine
49-
dataset = None
50-
temporary_dir = tempfile.mkdtemp()
69+
command = generateStandaloneRedisServerArgs(dbdir, local_module_file, port)
70+
5171
logging.info(
52-
"Using local temporary dir to spin up Redis Instance. Path: {}".format(
53-
temporary_dir
72+
"Running local redis-server with the following args: {}".format(
73+
" ".join(command)
5474
)
5575
)
56-
checkDatasetLocalRequirements(benchmark_config, temporary_dir, dirname)
76+
redis_process = subprocess.Popen(command)
77+
result = waitForConn(redis.StrictRedis())
78+
if result is True:
79+
logging.info("Redis available")
80+
return redis_process
5781

82+
83+
def generateStandaloneRedisServerArgs(dbdir, local_module_file, port):
5884
# start redis-server
5985
command = [
6086
"redis-server",
@@ -63,20 +89,12 @@ def spinUpLocalRedis(
6389
"--port",
6490
"{}".format(port),
6591
"--dir",
66-
temporary_dir,
67-
"--loadmodule",
68-
os.path.abspath(local_module_file),
69-
]
70-
logging.info(
71-
"Running local redis-server with the following args: {}".format(
72-
" ".join(command)
73-
)
74-
)
75-
redis_process = subprocess.Popen(command)
76-
result = waitForConn(redis.StrictRedis())
77-
if result is True:
78-
logging.info("Redis available")
79-
return redis_process
92+
dbdir]
93+
if local_module_file is not None:
94+
command.extend(["--loadmodule",
95+
os.path.abspath(local_module_file),
96+
])
97+
return command
8098

8199

82100
def isProcessAlive(process):

redisbench_admin/utils/remote.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from python_terraform import Terraform
1515
from tqdm import tqdm
1616

17+
from redisbench_admin.utils.local import checkDatasetLocalRequirements
18+
1719

1820
def get_git_root(path):
1921
git_repo = git.Repo(path, search_parent_directories=True)
@@ -105,18 +107,16 @@ def checkDatasetRemoteRequirements(
105107
benchmark_config, server_public_ip, username, private_key, remote_dataset_file, dirname
106108
):
107109
res = True
108-
for k in benchmark_config["dbconfig"]:
109-
if "dataset" in k:
110-
dataset = k["dataset"]
110+
dataset, fullpath, tmppath = checkDatasetLocalRequirements(benchmark_config, ".", dirname)
111111
if dataset is not None:
112112
logging.info('Detected dataset config. Will copy file to remote setup... "{}"'.format(dataset))
113113
res = copyFileToRemoteSetup(
114114
server_public_ip,
115115
username,
116116
private_key,
117-
dataset,
117+
fullpath,
118118
remote_dataset_file,
119-
dirname,
119+
None,
120120
)
121121
return res
122122

tests/test_local.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import os
2+
import shutil
3+
import tempfile
4+
5+
from redisbench_admin.utils.local import checkDatasetLocalRequirements
6+
7+
8+
def test_check_dataset_local_requirements():
9+
url = "https://s3.amazonaws.com/benchmarks.redislabs/redistimeseries/tsbs/datasets/devops/functional/scale-100-redistimeseries_data.rdb"
10+
# no db config
11+
checkDatasetLocalRequirements({}, ".", ".")
12+
# dbconfig with no dataset key
13+
checkDatasetLocalRequirements({"dbconfig": {}}, ".", ".")
14+
15+
# dbconfig with local filename
16+
directory_from = tempfile.mkdtemp()
17+
directory_to = tempfile.mkdtemp()
18+
temp_file1 = tempfile.NamedTemporaryFile(dir=directory_from)
19+
checkDatasetLocalRequirements({"dbconfig": [{"dataset": temp_file1.name}]}, directory_to.__str__(), None)
20+
assert os.path.exists("{}/{}".format(directory_from.__str__(), temp_file1.name.split("/")[-1]))
21+
assert os.path.exists("{}/{}".format(directory_to.__str__(), "dump.rdb"))
22+
shutil.rmtree(directory_from)
23+
shutil.rmtree(directory_to)
24+
25+
# dbconfig with remote filename
26+
tests_remote_tmp_datasets = "./tests/temp-datasets"
27+
if os.path.isdir(tests_remote_tmp_datasets):
28+
shutil.rmtree(tests_remote_tmp_datasets)
29+
30+
directory_to = tempfile.mkdtemp()
31+
checkDatasetLocalRequirements({"dbconfig": [{"dataset": url}]}, directory_to.__str__(), None, tests_remote_tmp_datasets)
32+
assert os.path.exists("{}/{}".format(directory_to.__str__(), "dump.rdb"))
33+
assert os.path.exists("{}/{}".format(tests_remote_tmp_datasets, "scale-100-redistimeseries_data.rdb"))
34+
checkDatasetLocalRequirements({"dbconfig": [{"dataset": url}]}, directory_to.__str__(), None, tests_remote_tmp_datasets)
35+
assert os.path.exists("{}/{}".format(directory_to.__str__(), "dump.rdb"))
36+
shutil.rmtree(directory_to)
37+
if os.path.isdir(tests_remote_tmp_datasets):
38+
shutil.rmtree(tests_remote_tmp_datasets)
39+

0 commit comments

Comments
 (0)