Skip to content

Commit a3c010d

Browse files
committed
Support to use Schedular in tpch benchmark
1 parent e1204a5 commit a3c010d

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

benchmarks/src/bin/tpch.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ use datafusion::datasource::file_format::csv::DEFAULT_CSV_EXTENSION;
4949
use datafusion::datasource::file_format::parquet::DEFAULT_PARQUET_EXTENSION;
5050
use datafusion::datasource::listing::ListingTableUrl;
5151
use datafusion::execution::context::SessionState;
52+
use datafusion::scheduler::Scheduler;
53+
use futures::TryStreamExt;
5254
use serde::Serialize;
5355
use structopt::StructOpt;
5456

@@ -101,6 +103,10 @@ struct DataFusionBenchmarkOpt {
101103
/// Whether to disable collection of statistics (and cost based optimizations) or not.
102104
#[structopt(short = "S", long = "disable-statistics")]
103105
disable_statistics: bool,
106+
107+
/// Enable scheduler
108+
#[structopt(short = "e", long = "enable-scheduler")]
109+
enable_scheduler: bool,
104110
}
105111

106112
#[derive(Debug, StructOpt)]
@@ -235,14 +241,16 @@ async fn benchmark_query(
235241
if query_id == 15 {
236242
for (n, query) in sql.iter().enumerate() {
237243
if n == 1 {
238-
result = execute_query(&ctx, query, opt.debug).await?;
244+
result = execute_query(&ctx, query, opt.debug, opt.enable_scheduler)
245+
.await?;
239246
} else {
240-
execute_query(&ctx, query, opt.debug).await?;
247+
execute_query(&ctx, query, opt.debug, opt.enable_scheduler).await?;
241248
}
242249
}
243250
} else {
244251
for query in sql {
245-
result = execute_query(&ctx, query, opt.debug).await?;
252+
result =
253+
execute_query(&ctx, query, opt.debug, opt.enable_scheduler).await?;
246254
}
247255
}
248256

@@ -317,6 +325,7 @@ async fn execute_query(
317325
ctx: &SessionContext,
318326
sql: &str,
319327
debug: bool,
328+
enable_scheduler: bool,
320329
) -> Result<Vec<RecordBatch>> {
321330
let plan = ctx.sql(sql).await?;
322331
let plan = plan.to_unoptimized_plan();
@@ -337,7 +346,13 @@ async fn execute_query(
337346
);
338347
}
339348
let task_ctx = ctx.task_ctx();
340-
let result = collect(physical_plan.clone(), task_ctx).await?;
349+
let result = if enable_scheduler {
350+
let scheduler = Scheduler::new(num_cpus::get());
351+
let results = scheduler.schedule(physical_plan.clone(), task_ctx).unwrap();
352+
results.stream().try_collect().await?
353+
} else {
354+
collect(physical_plan.clone(), task_ctx).await?
355+
};
341356
if debug {
342357
println!(
343358
"=== Physical plan with metrics ===\n{}\n",
@@ -813,7 +828,7 @@ mod tests {
813828

814829
let sql = &get_query_sql(n)?;
815830
for query in sql {
816-
execute_query(&ctx, query, false).await?;
831+
execute_query(&ctx, query, false, false).await?;
817832
}
818833

819834
Ok(())
@@ -841,6 +856,7 @@ mod ci {
841856
mem_table: false,
842857
output_path: None,
843858
disable_statistics: false,
859+
enable_scheduler: false,
844860
};
845861
register_tables(&opt, &ctx).await?;
846862
let queries = get_query_sql(query)?;
@@ -1153,6 +1169,7 @@ mod ci {
11531169
mem_table: false,
11541170
output_path: None,
11551171
disable_statistics: false,
1172+
enable_scheduler: false,
11561173
};
11571174
let mut results = benchmark_datafusion(opt).await?;
11581175
assert_eq!(results.len(), 1);

0 commit comments

Comments
 (0)