Skip to content

feat: add more components to the wasm-pack compatible list #8843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ path = "src/lib.rs"
# Used to enable the avro format
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
crypto_expressions = ["datafusion-physical-expr/crypto_expressions", "datafusion-optimizer/crypto_expressions"]
default = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
encoding_expressions = ["datafusion-physical-expr/encoding_expressions"]
Expand Down Expand Up @@ -87,8 +87,8 @@ pin-project-lite = "^0.2.7"
rand = { workspace = true }
sqlparser = { workspace = true }
tempfile = { workspace = true }
tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio-util = { version = "0.7.4", features = ["io"] }
tokio = { version = "1.28", features = ["macros", "rt", "sync"] }
tokio-util = { version = "0.7.4", features = ["io"], optional = true }
url = { workspace = true }
uuid = { version = "1.0", features = ["v4"] }
xz2 = { version = "0.1", optional = true }
Expand All @@ -113,6 +113,7 @@ rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
serde_json = { workspace = true }
test-utils = { path = "../../test-utils" }
thiserror = { workspace = true }
tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio-postgres = "0.7.7"
[target.'cfg(not(target_os = "windows"))'.dev-dependencies]
nix = { version = "0.27.1", features = ["fs"] }
Expand Down
4 changes: 4 additions & 0 deletions datafusion/core/src/datasource/listing/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,14 @@ impl ListingTableUrl {
let s = s.as_ref();

// This is necessary to handle the case of a path starting with a drive letter
#[cfg(not(target_arch = "wasm32"))]
if std::path::Path::new(s).is_absolute() {
return Self::parse_path(s);
}

match Url::parse(s) {
Ok(url) => Self::try_new(url, None),
#[cfg(not(target_arch = "wasm32"))]
Err(url::ParseError::RelativeUrlWithoutBase) => Self::parse_path(s),
Err(e) => Err(DataFusionError::External(Box::new(e))),
}
Expand Down Expand Up @@ -146,6 +148,7 @@ impl ListingTableUrl {
}

/// Creates a new [`ListingTableUrl`] interpreting `s` as a filesystem path
#[cfg(not(target_arch = "wasm32"))]
fn parse_path(s: &str) -> Result<Self> {
let (path, glob) = match split_glob_expression(s) {
Some((prefix, glob)) => {
Expand Down Expand Up @@ -282,6 +285,7 @@ impl ListingTableUrl {
}

/// Creates a file URL from a potentially relative filesystem path
#[cfg(not(target_arch = "wasm32"))]
fn url_from_filesystem_path(s: &str) -> Option<Url> {
let path = std::path::Path::new(s);
let is_dir = match path.exists() {
Expand Down
11 changes: 10 additions & 1 deletion datafusion/execution/src/object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

use dashmap::DashMap;
use datafusion_common::{exec_err, DataFusionError, Result};
#[cfg(not(target_arch = "wasm32"))]
use object_store::local::LocalFileSystem;
use object_store::ObjectStore;
use std::sync::Arc;
Expand Down Expand Up @@ -169,16 +170,24 @@ impl Default for DefaultObjectStoreRegistry {

impl DefaultObjectStoreRegistry {
/// This will register [`LocalFileSystem`] to handle `file://` paths
#[cfg(not(target_arch = "wasm32"))]
pub fn new() -> Self {
let object_stores: DashMap<String, Arc<dyn ObjectStore>> = DashMap::new();
object_stores.insert("file://".to_string(), Arc::new(LocalFileSystem::new()));
Self { object_stores }
}

/// Default without any backend registered.
#[cfg(target_arch = "wasm32")]
pub fn new() -> Self {
Self::default()
}
}

///
/// Stores are registered based on the scheme, host and port of the provided URL
/// with a [`LocalFileSystem::new`] automatically registered for `file://`
/// with a [`LocalFileSystem::new`] automatically registered for `file://` (if the
/// target arch is not `wasm32`).
///
/// For example:
///
Expand Down
15 changes: 12 additions & 3 deletions datafusion/physical-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ name = "datafusion_physical_plan"
path = "src/lib.rs"

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
Expand All @@ -54,10 +56,17 @@ once_cell = "1.18.0"
parking_lot = { workspace = true }
pin-project-lite = "^0.2.7"
rand = { workspace = true }
tokio = { version = "1.28", features = ["sync", "fs", "parking_lot"] }
tokio = { version = "1.28", features = ["sync"] }
uuid = { version = "^1.2", features = ["v4"] }

[dev-dependencies]
rstest = { workspace = true }
termtree = "0.4.1"
tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio = { version = "1.28", features = [
"macros",
"rt",
"rt-multi-thread",
"sync",
"fs",
"parking_lot",
] }
2 changes: 2 additions & 0 deletions datafusion/substrait/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ object_store = { workspace = true }
prost = "0.12"
prost-types = "0.12"
substrait = "0.22.1"

[dev-dependencies]
tokio = "1.17"
Comment on lines +40 to 41
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I move this to dev-dependencies casually. But datafusion-substrait and datafusion-proto are not supported yet.


[features]
Expand Down
5 changes: 4 additions & 1 deletion datafusion/wasmtest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ authors = { workspace = true }
rust-version = "1.70"

[lib]
crate-type = ["cdylib", "rlib",]
crate-type = ["cdylib", "rlib"]

[dependencies]

Expand All @@ -37,11 +37,14 @@ crate-type = ["cdylib", "rlib",]
# all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
# code size when deploying.
console_error_panic_hook = { version = "0.1.1", optional = true }
datafusion = { path = "../core", default-features = false }

datafusion-common = { workspace = true }
datafusion-execution = { workspace = true }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I double checked and this addition means that the wasm ci job now checks these crates
https://github.com/apache/arrow-datafusion/blob/8353a2ca2fd1f0ed5fc764b7463dfbcaa033ceef/.github/workflows/rust.yml#L192-L207

Here is the example CI run from this PR showing the new crates building in wasm https://github.com/apache/arrow-datafusion/actions/runs/7504130188/job/20430447784?pr=8843#step:6:194

datafusion-expr = { workspace = true }
datafusion-optimizer = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-plan = { workspace = true }
datafusion-sql = { workspace = true }

# getrandom must be compiled with js feature
Expand Down
3 changes: 3 additions & 0 deletions datafusion/wasmtest/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,13 @@ Then open http://localhost:8080/ in a web browser and check the console to see t

The following DataFusion crates are verified to work in a wasm-pack environment using the default `wasm32-unknown-unknown` target:

- `datafusion` (datafusion-core) with default-features disabled to remove `bzip2-sys` from `async-compression`
- `datafusion-common` with default-features disabled to remove the `parquet` dependency (see below)
- `datafusion-expr`
- `datafusion-execution`
- `datafusion-optimizer`
- `datafusion-physical-expr`
- `datafusion-physical-plan`
- `datafusion-sql`

The difficulty with getting the remaining DataFusion crates compiled to WASM is that they have non-optional dependencies on the [`parquet`](https://docs.rs/crate/parquet/) crate with its default features enabled. Several of the default parquet crate features require native dependencies that are not compatible with WASM, in particular the `lz4` and `zstd` features. If we can arrange our feature flags to make it possible to depend on parquet with these features disabled, then it should be possible to compile the core `datafusion` crate to WASM as well.