diff --git a/Cargo.lock b/Cargo.lock index d22e95b1a736..d1967517ddd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1730,7 +1730,6 @@ dependencies = [ "arrow", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", "bzip2 0.5.0", @@ -1759,7 +1758,6 @@ dependencies = [ "env_logger", "flate2", "futures", - "glob", "itertools 0.14.0", "log", "nix 0.29.0", @@ -1778,7 +1776,6 @@ dependencies = [ "tempfile", "test-utils", "tokio", - "tokio-util", "url", "uuid", "xz2", @@ -1834,6 +1831,8 @@ dependencies = [ "arrow-schema", "async-compression", "async-trait", + "bytes", + "bzip2 0.5.0", "chrono", "datafusion-catalog", "datafusion-common", @@ -1842,6 +1841,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "flate2", "futures", "glob", "itertools 0.14.0", @@ -1849,7 +1849,10 @@ dependencies = [ "object_store", "tempfile", "tokio", + "tokio-util", "url", + "xz2", + "zstd", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 56bc218f2706..1e35b7f42027 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -99,7 +99,7 @@ ctor = "0.2.9" dashmap = "6.0.1" datafusion = { path = "datafusion/core", version = "45.0.0", default-features = false } datafusion-catalog = { path = "datafusion/catalog", version = "45.0.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "45.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "45.0.0", default-features = false } datafusion-common = { path = "datafusion/common", version = "45.0.0", default-features = false } datafusion-common-runtime = { path = "datafusion/common-runtime", version = "45.0.0" } datafusion-doc = { path = "datafusion/doc", version = "45.0.0" } diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml index 03132e7b7bb5..99e20aa8f07a 100644 --- a/datafusion/catalog-listing/Cargo.toml +++ b/datafusion/catalog-listing/Cargo.toml @@ -27,6 +27,10 @@ repository.workspace = true rust-version.workspace = true version.workspace = true +[features] +compression = ["async-compression", "xz2", "bzip2", "flate2", "zstd", "tokio-util"] +default = ["compression"] + [dependencies] arrow = { workspace = true } arrow-schema = { workspace = true } @@ -37,6 +41,8 @@ async-compression = { version = "0.4.0", features = [ "zstd", "tokio", ], optional = true } +bytes = { workspace = true } +bzip2 = { version = "0.5.0", optional = true } chrono = { workspace = true } datafusion-catalog = { workspace = true } datafusion-common = { workspace = true, features = ["object_store"] } @@ -45,17 +51,21 @@ datafusion-expr = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } datafusion-physical-plan = { workspace = true } +flate2 = { version = "1.0.24", optional = true } futures = { workspace = true } glob = "0.3.0" itertools = { workspace = true } log = { workspace = true } object_store = { workspace = true } +tokio = { workspace = true } +tokio-util = { version = "0.7.4", features = ["io"], optional = true } url = { workspace = true } +xz2 = { version = "0.1", optional = true, features = ["static"] } +zstd = { version = "0.13", optional = true, default-features = false } [dev-dependencies] async-trait = { workspace = true } tempfile = { workspace = true } -tokio = { workspace = true } [lints] workspace = true diff --git a/datafusion/core/src/datasource/file_format/file_compression_type.rs b/datafusion/catalog-listing/src/file_compression_type.rs similarity index 98% rename from datafusion/core/src/datasource/file_format/file_compression_type.rs rename to datafusion/catalog-listing/src/file_compression_type.rs index 6612de077988..7cc3142564e9 100644 --- a/datafusion/core/src/datasource/file_format/file_compression_type.rs +++ b/datafusion/catalog-listing/src/file_compression_type.rs @@ -19,7 +19,7 @@ use std::str::FromStr; -use crate::error::{DataFusionError, Result}; +use datafusion_common::error::{DataFusionError, Result}; use datafusion_common::parsers::CompressionTypeVariant::{self, *}; use datafusion_common::GetExt; @@ -254,8 +254,8 @@ pub trait FileTypeExt { mod tests { use std::str::FromStr; - use crate::datasource::file_format::file_compression_type::FileCompressionType; - use crate::error::DataFusionError; + use super::FileCompressionType; + use datafusion_common::error::DataFusionError; use bytes::Bytes; use futures::StreamExt; diff --git a/datafusion/catalog-listing/src/mod.rs b/datafusion/catalog-listing/src/mod.rs index 709fa88b5867..98c03253733e 100644 --- a/datafusion/catalog-listing/src/mod.rs +++ b/datafusion/catalog-listing/src/mod.rs @@ -18,6 +18,7 @@ //! A table that uses the `ObjectStore` listing capability //! to get the list of files to process. +pub mod file_compression_type; pub mod file_groups; pub mod helpers; pub mod url; diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index bbd999ffe98b..c3c764ab1435 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -43,7 +43,7 @@ array_expressions = ["nested_expressions"] # Used to enable the avro format avro = ["apache-avro", "num-traits", "datafusion-common/avro"] backtrace = ["datafusion-common/backtrace"] -compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"] +compression = ["xz2", "bzip2", "flate2", "zstd", "datafusion-catalog-listing/compression"] crypto_expressions = ["datafusion-functions/crypto_expressions"] datetime_expressions = ["datafusion-functions/datetime_expressions"] default = [ @@ -87,13 +87,6 @@ apache-avro = { version = "0.17", optional = true } arrow = { workspace = true } arrow-ipc = { workspace = true } arrow-schema = { workspace = true } -async-compression = { version = "0.4.0", features = [ - "bzip2", - "gzip", - "xz", - "zstd", - "tokio", -], optional = true } async-trait = { workspace = true } bytes = { workspace = true } bzip2 = { version = "0.5.0", optional = true } @@ -117,7 +110,6 @@ datafusion-physical-plan = { workspace = true } datafusion-sql = { workspace = true } flate2 = { version = "1.0.24", optional = true } futures = { workspace = true } -glob = "0.3.0" itertools = { workspace = true } log = { workspace = true } num-traits = { version = "0.2", optional = true } @@ -129,7 +121,6 @@ regex = { workspace = true } sqlparser = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } -tokio-util = { version = "0.7.4", features = ["io"], optional = true } url = { workspace = true } uuid = { version = "1.7", features = ["v4", "js"] } xz2 = { version = "0.1", optional = true, features = ["static"] } diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index 383d2b14b31c..90a09252e789 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -24,12 +24,12 @@ pub const DEFAULT_SCHEMA_INFER_MAX_RECORD: usize = 1000; pub mod arrow; pub mod avro; pub mod csv; -pub mod file_compression_type; pub mod json; pub mod options; #[cfg(feature = "parquet")] pub mod parquet; pub mod write; +pub use datafusion_catalog_listing::file_compression_type; use std::any::Any; use std::collections::{HashMap, VecDeque};