File tree 2 files changed +23
-4
lines changed 2 files changed +23
-4
lines changed Original file line number Diff line number Diff line change @@ -46,6 +46,7 @@ script. Usage instructions can be found with:
46
46
47
47
``` shell
48
48
# show usage
49
+ cd ./benchmarks/
49
50
./bench.sh
50
51
```
51
52
@@ -64,9 +65,24 @@ Create / download a specific dataset (TPCH)
64
65
``` shell
65
66
./bench.sh data tpch
66
67
```
67
-
68
68
Data is placed in the ` data ` subdirectory.
69
69
70
+ ## Running benchmarks
71
+
72
+ Run benchmark for TPC-H dataset
73
+ ``` shell
74
+ ./bench.sh run tpch
75
+ ```
76
+ or for TPC-H dataset scale 10
77
+ ``` shell
78
+ ./bench.sh run tpch10
79
+ ```
80
+
81
+ To run for specific query, for example Q21
82
+ ``` shell
83
+ ./bench.sh run tpch10 21
84
+ ```
85
+
70
86
## Select join algorithm
71
87
The benchmark runs with ` prefer_hash_join == true ` by default, which enforces HASH join algorithm.
72
88
To run TPCH benchmarks with join other than HASH:
Original file line number Diff line number Diff line change @@ -43,7 +43,7 @@ usage() {
43
43
Orchestrates running benchmarks against DataFusion checkouts
44
44
45
45
Usage:
46
- $0 data [benchmark]
46
+ $0 data [benchmark] [query]
47
47
$0 run [benchmark]
48
48
$0 compare <branch1> <branch2>
49
49
$0 venv
@@ -410,7 +410,9 @@ run_tpch() {
410
410
RESULTS_FILE=" ${RESULTS_DIR} /tpch_sf${SCALE_FACTOR} .json"
411
411
echo " RESULTS_FILE: ${RESULTS_FILE} "
412
412
echo " Running tpch benchmark..."
413
- $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format parquet -o " ${RESULTS_FILE} "
413
+ # Optional query filter to run specific query
414
+ QUERY=$( [ -n " $ARG3 " ] && echo " --query $ARG3 " || echo " " )
415
+ $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format parquet -o " ${RESULTS_FILE} " $QUERY
414
416
}
415
417
416
418
# Runs the tpch in memory
@@ -425,8 +427,9 @@ run_tpch_mem() {
425
427
RESULTS_FILE=" ${RESULTS_DIR} /tpch_mem_sf${SCALE_FACTOR} .json"
426
428
echo " RESULTS_FILE: ${RESULTS_FILE} "
427
429
echo " Running tpch_mem benchmark..."
430
+ QUERY=$( [ -n " $ARG3 " ] && echo " --query $ARG3 " || echo " " )
428
431
# -m means in memory
429
- $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " -m --format parquet -o " ${RESULTS_FILE} "
432
+ $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " -m --format parquet -o " ${RESULTS_FILE} " $QUERY
430
433
}
431
434
432
435
# Runs the cancellation benchmark
You can’t perform that action at this time.
0 commit comments