tustvold
diff --git a/‎CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎Cargo.toml
Lines changed: 2 additions & 1 deletion b/‎Cargo.toml
Lines changed: 2 additions & 1 deletion
diff --git a/‎datafusion/core/Cargo.toml
Lines changed: 0 additions & 4 deletions b/‎datafusion/core/Cargo.toml
Lines changed: 0 additions & 4 deletions
diff --git a/‎datafusion/core/src/datasource/memory.rs
Lines changed: 9 additions & 19 deletions b/‎datafusion/core/src/datasource/memory.rs
Lines changed: 9 additions & 19 deletions
diff --git a/‎datafusion/scheduler/Cargo.toml
Lines changed: 56 additions & 0 deletions b/‎datafusion/scheduler/Cargo.toml
Lines changed: 56 additions & 0 deletions
diff --git a/‎datafusion/core/benches/parquet_query_sql.rs renamed to ‎datafusion/scheduler/benches/parquet_query_sql.rs
Lines changed: 61 additions & 4 deletions b/‎datafusion/core/benches/parquet_query_sql.rs renamed to ‎datafusion/scheduler/benches/parquet_query_sql.rs
Lines changed: 61 additions & 4 deletions
diff --git a/‎datafusion/core/benches/parquet_query_sql.sql renamed to ‎datafusion/scheduler/benches/parquet_query_sql.sql b/‎datafusion/core/benches/parquet_query_sql.sql renamed to ‎datafusion/scheduler/benches/parquet_query_sql.sql
@@ -150,7 +150,7 @@ The parquet SQL benchmarks can be run with
  cargo bench --bench parquet_query_sql
 ```
 
-These randomly generate a parquet file, and then benchmark queries sourced from [parquet_query_sql.sql](./datafusion/benches/parquet_query_sql.sql) against it. This can therefore be a quick way to add coverage of particular query and/or data paths.
+These randomly generate a parquet file, and then benchmark queries sourced from [parquet_query_sql.sql](./datafusion/scheduler/benches/parquet_query_sql.sql) against it. This can therefore be a quick way to add coverage of particular query and/or data paths.
 
 If the environment variable `PARQUET_FILE` is set, the benchmark will run queries against this file instead of a randomly generated one. This can be useful for performing multiple runs, potentially with different code, against the same source data, or for testing against a custom dataset.
 
 
@@ -17,12 +17,13 @@
 
 [workspace]
 members = [
-    "datafusion/core",
     "datafusion/common",
+    "datafusion/core",
     "datafusion/expr",
     "datafusion/jit",
     "datafusion/physical-expr",
     "datafusion/proto",
+    "datafusion/scheduler",
     "datafusion-examples",
     "benchmarks",
     "ballista/rust/client",
 
@@ -117,10 +117,6 @@ name = "scalar"
 harness = false
 name = "physical_plan"
 
-[[bench]]
-harness = false
-name = "parquet_query_sql"
-
 [[bench]]
 harness = false
 name = "jit"
 
@@ -71,25 +71,15 @@ impl MemTable {
         let exec = t.scan(&None, &[], None).await?;
         let partition_count = exec.output_partitioning().partition_count();
 
-        let tasks = (0..partition_count)
-            .map(|part_i| {
-                let context1 = context.clone();
-                let exec = exec.clone();
-                tokio::spawn(async move {
-                    let stream = exec.execute(part_i, context1.clone()).await?;
-                    common::collect(stream).await
-                })
-            })
-            // this collect *is needed* so that the join below can
-            // switch between tasks
-            .collect::<Vec<_>>();
-
-        let mut data: Vec<Vec<RecordBatch>> =
-            Vec::with_capacity(exec.output_partitioning().partition_count());
-        for task in tasks {
-            let result = task.await.expect("MemTable::load could not join task")?;
-            data.push(result);
-        }
+        let data = futures::future::try_join_all((0..partition_count).map(|part_i| {
+            let context1 = context.clone();
+            let exec = exec.clone();
+            async move {
+                let stream = exec.execute(part_i, context1.clone()).await?;
+                common::collect(stream).await
+            }
+        }))
+        .await?;
 
         let exec = MemoryExec::try_new(&data, schema.clone(), None)?;
 
 
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-scheduler"
+description = "Scheduling for DataFusion query engine"
+version = "7.0.0"
+homepage = "https://github.com/apache/arrow-datafusion"
+repository = "https://github.com/apache/arrow-datafusion"
+readme = "../README.md"
+authors = ["Apache Arrow <[email protected]>"]
+license = "Apache-2.0"
+keywords = ["arrow", "query", "sql"]
+edition = "2021"
+rust-version = "1.58"
+
+[lib]
+name = "datafusion_scheduler"
+path = "src/lib.rs"
+
+[features]
+
+[dependencies]
+ahash = { version = "0.7", default-features = false }
+arrow = { version = "11" }
+async-trait = "0.1"
+datafusion = { path = "../core", version = "7.0.0" }
+futures = "0.3"
+log = "0.4"
+parking_lot = "0.12"
+rayon = "1.5"
+
+[dev-dependencies]
+criterion = "0.3"
+rand = "0.8"
+tokio = { version = "1.0", features = ["macros", "rt"] }
+parquet = "11.0"
+tempfile = "3"
+
+[[bench]]
+harness = false
+name = "parquet_query_sql"
@@ -24,7 +24,13 @@ use arrow::datatypes::{
 };
 use arrow::record_batch::RecordBatch;
 use criterion::{criterion_group, criterion_main, Criterion};
+<<<<<<<< HEAD:datafusion/core/benches/parquet_query_sql.rs
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
+========
+use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_scheduler::Scheduler;
+use futures::stream::StreamExt;
+>>>>>>>> scheduler-experiments:datafusion-scheduler/benches/parquet_query_sql.rs
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::{WriterProperties, WriterVersion};
 use rand::distributions::uniform::SampleUniform;
@@ -37,7 +43,6 @@ use std::path::Path;
 use std::sync::Arc;
 use std::time::Instant;
 use tempfile::NamedTempFile;
-use tokio_stream::StreamExt;
 
 /// The number of batches to write
 const NUM_BATCHES: usize = 2048;
@@ -193,6 +198,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     assert!(Path::new(&file_path).exists(), "path not found");
     println!("Using parquet file {}", file_path);
 
+<<<<<<<< HEAD:datafusion/core/benches/parquet_query_sql.rs
     let context = SessionContext::new();
 
     let rt = tokio::runtime::Builder::new_multi_thread().build().unwrap();
@@ -202,6 +208,26 @@ fn criterion_benchmark(c: &mut Criterion) {
         ParquetReadOptions::default(),
     ))
     .unwrap();
+========
+    let partitions = 4;
+    let config = SessionConfig::new().with_target_partitions(partitions);
+    let mut context = SessionContext::with_config(config);
+
+    let scheduler = Scheduler::new(partitions);
+
+    let local_rt = tokio::runtime::Builder::new_current_thread()
+        .build()
+        .unwrap();
+
+    let query_rt = tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(partitions)
+        .build()
+        .unwrap();
+
+    local_rt
+        .block_on(context.register_parquet("t", file_path.as_str()))
+        .unwrap();
+>>>>>>>> scheduler-experiments:datafusion-scheduler/benches/parquet_query_sql.rs
 
     // We read the queries from a file so they can be changed without recompiling the benchmark
     let mut queries_file = File::open("benches/parquet_query_sql.sql").unwrap();
@@ -220,17 +246,48 @@ fn criterion_benchmark(c: &mut Criterion) {
             continue;
         }
 
-        let query = query.as_str();
-        c.bench_function(query, |b| {
+        c.bench_function(&format!("tokio: {}", query), |b| {
             b.iter(|| {
+<<<<<<<< HEAD:datafusion/core/benches/parquet_query_sql.rs
                 let context = context.clone();
                 rt.block_on(async move {
                     let query = context.sql(query).await.unwrap();
+========
+                let query = query.clone();
+                let mut context = context.clone();
+                let (sender, mut receiver) = futures::channel::mpsc::unbounded();
+
+                // Spawn work to a separate tokio thread pool
+                query_rt.spawn(async move {
+                    let query = context.sql(&query).await.unwrap();
+>>>>>>>> scheduler-experiments:datafusion-scheduler/benches/parquet_query_sql.rs
                     let mut stream = query.execute_stream().await.unwrap();
-                    while criterion::black_box(stream.next().await).is_some() {}
+
+                    while let Some(next) = stream.next().await {
+                        sender.unbounded_send(next).unwrap();
+                    }
+                });
+
+                local_rt.block_on(async {
+                    while receiver.next().await.transpose().unwrap().is_some() {}
                 })
             });
         });
+
+        c.bench_function(&format!("scheduled: {}", query), |b| {
+            b.iter(|| {
+                let query = query.clone();
+                let mut context = context.clone();
+
+                local_rt.block_on(async {
+                    let query = context.sql(&query).await.unwrap();
+                    let plan = query.create_physical_plan().await.unwrap();
+                    let mut stream =
+                        scheduler.schedule_plan(plan, context.task_ctx()).unwrap();
+                    while stream.next().await.transpose().unwrap().is_some() {}
+                });
+            });
+        });
     }
 
     // Temporary file must outlive the benchmarks, it is deleted when dropped