Skip to content

Commit 0f7b1c5

Browse files
authored
minor: Allow to run TPCH bench for a specific query (#15467)
* minor: Allow to run TPCH bench for a specific query
1 parent fa452e6 commit 0f7b1c5

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

benchmarks/README.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ script. Usage instructions can be found with:
4646

4747
```shell
4848
# show usage
49+
cd ./benchmarks/
4950
./bench.sh
5051
```
5152

@@ -64,9 +65,24 @@ Create / download a specific dataset (TPCH)
6465
```shell
6566
./bench.sh data tpch
6667
```
67-
6868
Data is placed in the `data` subdirectory.
6969

70+
## Running benchmarks
71+
72+
Run benchmark for TPC-H dataset
73+
```shell
74+
./bench.sh run tpch
75+
```
76+
or for TPC-H dataset scale 10
77+
```shell
78+
./bench.sh run tpch10
79+
```
80+
81+
To run for specific query, for example Q21
82+
```shell
83+
./bench.sh run tpch10 21
84+
```
85+
7086
## Select join algorithm
7187
The benchmark runs with `prefer_hash_join == true` by default, which enforces HASH join algorithm.
7288
To run TPCH benchmarks with join other than HASH:

benchmarks/bench.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ usage() {
4343
Orchestrates running benchmarks against DataFusion checkouts
4444
4545
Usage:
46-
$0 data [benchmark]
46+
$0 data [benchmark] [query]
4747
$0 run [benchmark]
4848
$0 compare <branch1> <branch2>
4949
$0 venv
@@ -410,7 +410,9 @@ run_tpch() {
410410
RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json"
411411
echo "RESULTS_FILE: ${RESULTS_FILE}"
412412
echo "Running tpch benchmark..."
413-
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}"
413+
# Optional query filter to run specific query
414+
QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
415+
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}" $QUERY
414416
}
415417

416418
# Runs the tpch in memory
@@ -425,8 +427,9 @@ run_tpch_mem() {
425427
RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
426428
echo "RESULTS_FILE: ${RESULTS_FILE}"
427429
echo "Running tpch_mem benchmark..."
430+
QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
428431
# -m means in memory
429-
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}"
432+
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" $QUERY
430433
}
431434

432435
# Runs the cancellation benchmark

0 commit comments

Comments
 (0)