diff --git a/Cargo.lock b/Cargo.lock index a30d0b4a7bd4..1cca34e1899e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1809,7 +1809,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -1879,7 +1879,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "datafusion", @@ -1903,7 +1903,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -1927,7 +1927,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -1949,7 +1949,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "assert_cmd", @@ -1978,7 +1978,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "46.0.1" +version = "47.0.0" dependencies = [ "ahash 0.8.11", "apache-avro", @@ -2005,7 +2005,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "46.0.1" +version = "47.0.0" dependencies = [ "futures", "log", @@ -2014,7 +2014,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-compression", @@ -2049,7 +2049,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "46.0.1" +version = "47.0.0" dependencies = [ "apache-avro", "arrow", @@ -2074,7 +2074,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -2097,7 +2097,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -2120,7 +2120,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -2150,11 +2150,11 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "46.0.1" +version = "47.0.0" [[package]] name = "datafusion-examples" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2184,7 +2184,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "chrono", @@ -2202,7 +2202,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "chrono", @@ -2223,7 +2223,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2234,7 +2234,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "46.0.1" +version = "47.0.0" dependencies = [ "abi_stable", "arrow", @@ -2253,7 +2253,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2282,7 +2282,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "46.0.1" +version = "47.0.0" dependencies = [ "ahash 0.8.11", "arrow", @@ -2303,7 +2303,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "46.0.1" +version = "47.0.0" dependencies = [ "ahash 0.8.11", "arrow", @@ -2316,7 +2316,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2337,7 +2337,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -2351,7 +2351,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2367,7 +2367,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "46.0.1" +version = "47.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2375,7 +2375,7 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "46.0.1" +version = "47.0.0" dependencies = [ "datafusion-expr", "quote", @@ -2384,7 +2384,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -2409,7 +2409,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "46.0.1" +version = "47.0.0" dependencies = [ "ahash 0.8.11", "arrow", @@ -2434,7 +2434,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "46.0.1" +version = "47.0.0" dependencies = [ "ahash 0.8.11", "arrow", @@ -2446,7 +2446,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2465,7 +2465,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "46.0.1" +version = "47.0.0" dependencies = [ "ahash 0.8.11", "arrow", @@ -2501,7 +2501,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "chrono", @@ -2524,7 +2524,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2537,7 +2537,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -2559,7 +2559,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2583,7 +2583,7 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "46.0.1" +version = "47.0.0" dependencies = [ "arrow", "async-trait", @@ -2614,7 +2614,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "46.0.1" +version = "47.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2634,7 +2634,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "46.0.1" +version = "47.0.0" dependencies = [ "chrono", "console_error_panic_hook", diff --git a/Cargo.toml b/Cargo.toml index 10d5af82feaa..4f8cfa8baa87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,7 +75,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.82.0" # Define DataFusion version -version = "46.0.1" +version = "47.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -107,38 +107,38 @@ chrono = { version = "0.4.38", default-features = false } criterion = "0.5.1" ctor = "0.2.9" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "46.0.1", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "46.0.1" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "46.0.1" } -datafusion-common = { path = "datafusion/common", version = "46.0.1", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "46.0.1" } -datafusion-datasource = { path = "datafusion/datasource", version = "46.0.1", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "46.0.1", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "46.0.1", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "46.0.1", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "46.0.1", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "46.0.1" } -datafusion-execution = { path = "datafusion/execution", version = "46.0.1" } -datafusion-expr = { path = "datafusion/expr", version = "46.0.1" } -datafusion-expr-common = { path = "datafusion/expr-common", version = "46.0.1" } -datafusion-ffi = { path = "datafusion/ffi", version = "46.0.1" } -datafusion-functions = { path = "datafusion/functions", version = "46.0.1" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "46.0.1" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "46.0.1" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "46.0.1" } -datafusion-functions-table = { path = "datafusion/functions-table", version = "46.0.1" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "46.0.1" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "46.0.1" } -datafusion-macros = { path = "datafusion/macros", version = "46.0.1" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "46.0.1", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "46.0.1", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "46.0.1", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "46.0.1" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "46.0.1" } -datafusion-proto = { path = "datafusion/proto", version = "46.0.1" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "46.0.1" } -datafusion-session = { path = "datafusion/session", version = "46.0.1" } -datafusion-sql = { path = "datafusion/sql", version = "46.0.1" } +datafusion = { path = "datafusion/core", version = "47.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "47.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "47.0.0" } +datafusion-common = { path = "datafusion/common", version = "47.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "47.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "47.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "47.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "47.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "47.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "47.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "47.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "47.0.0" } +datafusion-expr = { path = "datafusion/expr", version = "47.0.0" } +datafusion-expr-common = { path = "datafusion/expr-common", version = "47.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "47.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "47.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "47.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "47.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "47.0.0" } +datafusion-functions-table = { path = "datafusion/functions-table", version = "47.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "47.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "47.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "47.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "47.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "47.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "47.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "47.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "47.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "47.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "47.0.0" } +datafusion-session = { path = "datafusion/session", version = "47.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "47.0.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" diff --git a/dev/changelog/47.0.0.md b/dev/changelog/47.0.0.md new file mode 100644 index 000000000000..68dd5978e41c --- /dev/null +++ b/dev/changelog/47.0.0.md @@ -0,0 +1,503 @@ + + +# Apache DataFusion 47.0.0 Changelog + +This release consists of 362 commits from 95 contributors. See credits at the end of this changelog for more information. + +**Breaking changes:** + +- chore: cleanup deprecated API since `version <= 40` [#15027](https://github.com/apache/datafusion/pull/15027) (qazxcdswe123) +- fix: mark ScalarUDFImpl::invoke_batch as deprecated [#15049](https://github.com/apache/datafusion/pull/15049) (Blizzara) +- feat: support customize metadata in alias for dataframe api [#15120](https://github.com/apache/datafusion/pull/15120) (chenkovsky) +- Refactor: add `FileGroup` structure for `Vec` [#15379](https://github.com/apache/datafusion/pull/15379) (xudong963) +- Change default `EXPLAIN` format in `datafusion-cli` to `tree` format [#15427](https://github.com/apache/datafusion/pull/15427) (alamb) +- Support computing statistics for FileGroup [#15432](https://github.com/apache/datafusion/pull/15432) (xudong963) +- Remove redundant statistics from FileScanConfig [#14955](https://github.com/apache/datafusion/pull/14955) (Standing-Man) +- parquet reader: move pruning predicate creation from ParquetSource to ParquetOpener [#15561](https://github.com/apache/datafusion/pull/15561) (adriangb) +- feat: Add unique id for every memory consumer [#15613](https://github.com/apache/datafusion/pull/15613) (EmilyMatt) + +**Performance related:** + +- Fix sequential metadata fetching in ListingTable causing high latency [#14918](https://github.com/apache/datafusion/pull/14918) (geoffreyclaude) +- Implement GroupsAccumulator for min/max Duration [#15322](https://github.com/apache/datafusion/pull/15322) (shruti2522) +- [Minor] Remove/reorder logical plan rules [#15421](https://github.com/apache/datafusion/pull/15421) (Dandandan) +- Improve performance of `first_value` by implementing special `GroupsAccumulator` [#15266](https://github.com/apache/datafusion/pull/15266) (UBarney) +- perf: unwrap cast for comparing ints =/!= strings [#15110](https://github.com/apache/datafusion/pull/15110) (alan910127) +- Improve performance sort TPCH q3 with Utf8Vew ( Sort-preserving mergi… [#15447](https://github.com/apache/datafusion/pull/15447) (zhuqi-lucas) +- perf: Reuse row converter during sort [#15302](https://github.com/apache/datafusion/pull/15302) (2010YOUY01) +- perf: Add TopK benchmarks as variation over the `sort_tpch` benchmarks [#15560](https://github.com/apache/datafusion/pull/15560) (geoffreyclaude) +- Perf: remove `clone` on `uninitiated_partitions` in SortPreservingMergeStream [#15562](https://github.com/apache/datafusion/pull/15562) (rluvaton) +- Add short circuit evaluation for `AND` and `OR` [#15462](https://github.com/apache/datafusion/pull/15462) (acking-you) +- perf: Introduce sort prefix computation for early TopK exit optimization on partially sorted input (10x speedup on top10 bench) [#15563](https://github.com/apache/datafusion/pull/15563) (geoffreyclaude) +- Improve performance of `last_value` by implementing special `GroupsAccumulator` [#15542](https://github.com/apache/datafusion/pull/15542) (UBarney) +- Enhance: simplify `x=x` --> `x IS NOT NULL OR NULL` [#15589](https://github.com/apache/datafusion/pull/15589) (ding-young) + +**Implemented enhancements:** + +- feat: Add `tree` / pretty explain mode [#14677](https://github.com/apache/datafusion/pull/14677) (irenjj) +- feat: Add `array_max` function support [#14470](https://github.com/apache/datafusion/pull/14470) (erenavsarogullari) +- feat: implement tree explain for `ProjectionExec` [#15082](https://github.com/apache/datafusion/pull/15082) (Standing-Man) +- feat: support ApproxDistinct with utf8view [#15200](https://github.com/apache/datafusion/pull/15200) (zhuqi-lucas) +- feat: Attach `Diagnostic` to more than one column errors in scalar_subquery and in_subquery [#15143](https://github.com/apache/datafusion/pull/15143) (changsun20) +- feat: topk functionality for aggregates should support utf8view and largeutf8 [#15152](https://github.com/apache/datafusion/pull/15152) (zhuqi-lucas) +- feat: Native support utf8view for regex string operators [#15275](https://github.com/apache/datafusion/pull/15275) (zhuqi-lucas) +- feat: introduce `JoinSetTracer` trait for tracing context propagation in spawned tasks [#14547](https://github.com/apache/datafusion/pull/14547) (geoffreyclaude) +- feat: Support serde for JsonSource PhysicalPlan [#15311](https://github.com/apache/datafusion/pull/15311) (westhide) +- feat: Support serde for FileScanConfig `batch_size` [#15335](https://github.com/apache/datafusion/pull/15335) (westhide) +- feat: simplify regex wildcard pattern [#15299](https://github.com/apache/datafusion/pull/15299) (waynexia) +- feat: Add union_by_name, union_by_name_distinct to DataFrame api [#15489](https://github.com/apache/datafusion/pull/15489) (Omega359) +- feat: Add config `max_temp_directory_size` to limit max disk usage for spilling queries [#15520](https://github.com/apache/datafusion/pull/15520) (2010YOUY01) +- feat: Add tracing regression tests [#15673](https://github.com/apache/datafusion/pull/15673) (geoffreyclaude) + +**Fixed bugs:** + +- fix: External sort failing on an edge case [#15017](https://github.com/apache/datafusion/pull/15017) (2010YOUY01) +- fix: graceful NULL and type error handling in array functions [#14737](https://github.com/apache/datafusion/pull/14737) (alan910127) +- fix: Support datatype cast for insert api same as insert into sql [#15091](https://github.com/apache/datafusion/pull/15091) (zhuqi-lucas) +- fix: unparse for subqueryalias [#15068](https://github.com/apache/datafusion/pull/15068) (chenkovsky) +- fix: date_trunc bench broken by #15049 [#15169](https://github.com/apache/datafusion/pull/15169) (Blizzara) +- fix: compound_field_access doesn't identifier qualifier. [#15153](https://github.com/apache/datafusion/pull/15153) (chenkovsky) +- fix: unparsing left/ right semi/mark join [#15212](https://github.com/apache/datafusion/pull/15212) (chenkovsky) +- fix: handle duplicate WindowFunction expressions in Substrait consumer [#15211](https://github.com/apache/datafusion/pull/15211) (Blizzara) +- fix: write hive partitions for any int/uint/float [#15337](https://github.com/apache/datafusion/pull/15337) (christophermcdermott) +- fix: `core_expressions` feature flag broken, move `overlay` into `core` functions [#15217](https://github.com/apache/datafusion/pull/15217) (shruti2522) +- fix: Redundant files spilled during external sort + introduce `SpillManager` [#15355](https://github.com/apache/datafusion/pull/15355) (2010YOUY01) +- fix: typo of DropFunction [#15434](https://github.com/apache/datafusion/pull/15434) (chenkovsky) +- fix: Unconditionally wrap UNION BY NAME input nodes w/ `Projection` [#15242](https://github.com/apache/datafusion/pull/15242) (rkrishn7) +- fix: the average time for clickbench query compute should use new vec to make it compute for each query [#15472](https://github.com/apache/datafusion/pull/15472) (zhuqi-lucas) +- fix: Assertion fail in external sort [#15469](https://github.com/apache/datafusion/pull/15469) (2010YOUY01) +- fix: aggregation corner case [#15457](https://github.com/apache/datafusion/pull/15457) (chenkovsky) +- fix: update group by columns for merge phase after spill [#15531](https://github.com/apache/datafusion/pull/15531) (rluvaton) +- fix: Queries similar to `count-bug` produce incorrect results [#15281](https://github.com/apache/datafusion/pull/15281) (suibianwanwank) +- fix: ffi aggregation [#15576](https://github.com/apache/datafusion/pull/15576) (chenkovsky) +- fix: nested window function [#15033](https://github.com/apache/datafusion/pull/15033) (chenkovsky) +- fix: dictionary encoded column to partition column casting bug [#15652](https://github.com/apache/datafusion/pull/15652) (haruband) +- fix: recursion protection for physical plan node [#15600](https://github.com/apache/datafusion/pull/15600) (chenkovsky) +- fix: add map coercion for binary ops [#15551](https://github.com/apache/datafusion/pull/15551) (alexwilcoxson-rel) +- fix: Rewrite `date_trunc` and `from_unixtime` for the SQLite unparser [#15630](https://github.com/apache/datafusion/pull/15630) (peasee) +- fix(substrait): fix regressed edge case in renaming inner struct fields [#15634](https://github.com/apache/datafusion/pull/15634) (Blizzara) +- fix: normalize window ident [#15639](https://github.com/apache/datafusion/pull/15639) (chenkovsky) + +**Documentation updates:** + +- MINOR fix(docs): set the proper link for dev-env setup in contrib guide [#14960](https://github.com/apache/datafusion/pull/14960) (clflushopt) +- Add Upgrade Guide for DataFusion 46.0.0 [#14891](https://github.com/apache/datafusion/pull/14891) (alamb) +- Improve `SessionStateBuilder::new` documentation [#14980](https://github.com/apache/datafusion/pull/14980) (alamb) +- Minor: Replace Star and Fork buttons in docs with static versions [#14988](https://github.com/apache/datafusion/pull/14988) (amoeba) +- Fix documentation warnings and error if anymore occur [#14952](https://github.com/apache/datafusion/pull/14952) (AmosAidoo) +- docs: Improve docs on AggregateFunctionExpr construction [#15044](https://github.com/apache/datafusion/pull/15044) (ctsk) +- Minor: More comment to aggregation fuzzer [#15048](https://github.com/apache/datafusion/pull/15048) (2010YOUY01) +- Improve benchmark documentation [#15054](https://github.com/apache/datafusion/pull/15054) (carols10cents) +- doc: update RecordBatchReceiverStreamBuilder::spawn_blocking task behaviour [#14995](https://github.com/apache/datafusion/pull/14995) (shruti2522) +- doc: Correct benchmark command [#15094](https://github.com/apache/datafusion/pull/15094) (qazxcdswe123) +- Add `insta` / snapshot testing to CLI & set up AWS mock [#13672](https://github.com/apache/datafusion/pull/13672) (blaginin) +- Config: Add support default sql varchar to view types [#15104](https://github.com/apache/datafusion/pull/15104) (zhuqi-lucas) +- Support `EXPLAIN ... FORMAT ...` [#15166](https://github.com/apache/datafusion/pull/15166) (alamb) +- Update version to 46.0.1, add CHANGELOG (#15243) [#15244](https://github.com/apache/datafusion/pull/15244) (xudong963) +- docs: update documentation for Final GroupBy in accumulator.rs [#15279](https://github.com/apache/datafusion/pull/15279) (qazxcdswe123) +- minor: fix `data/sqlite` link [#15286](https://github.com/apache/datafusion/pull/15286) (sdht0) +- Add upgrade notes for array signatures [#15237](https://github.com/apache/datafusion/pull/15237) (jkosh44) +- Add doc for the `statistics_from_parquet_meta_calc method` [#15330](https://github.com/apache/datafusion/pull/15330) (xudong963) +- added explaination for Schema and DFSchema to documentation [#15329](https://github.com/apache/datafusion/pull/15329) (Jiashu-Hu) +- Documentation: Plan custom expressions [#15353](https://github.com/apache/datafusion/pull/15353) (Jiashu-Hu) +- Update concepts-readings-events.md [#15440](https://github.com/apache/datafusion/pull/15440) (berkaysynnada) +- Add support for DISTINCT + ORDER BY in `ARRAY_AGG` [#14413](https://github.com/apache/datafusion/pull/14413) (gabotechs) +- Update the copyright year [#15453](https://github.com/apache/datafusion/pull/15453) (omkenge) +- Docs: Formatting and Added Extra resources [#15450](https://github.com/apache/datafusion/pull/15450) (2SpaceMasterRace) +- Add documentation for `Run extended tests` command [#15463](https://github.com/apache/datafusion/pull/15463) (alamb) +- bench: Document how to use cross platform Samply profiler [#15481](https://github.com/apache/datafusion/pull/15481) (comphead) +- Update user guide to note decimal is not experimental anymore [#15515](https://github.com/apache/datafusion/pull/15515) (Jiashu-Hu) +- datafusion-cli: document reading partitioned parquet [#15505](https://github.com/apache/datafusion/pull/15505) (marvelshan) +- Update concepts-readings-events.md [#15541](https://github.com/apache/datafusion/pull/15541) (oznur-synnada) +- Add documentation example for `AggregateExprBuilder` [#15504](https://github.com/apache/datafusion/pull/15504) (Shreyaskr1409) +- Docs : Added Sql examples for window Functions : `nth_val` , etc [#15555](https://github.com/apache/datafusion/pull/15555) (Adez017) +- Add disk usage limit configuration to datafusion-cli [#15586](https://github.com/apache/datafusion/pull/15586) (jsai28) +- Bug fix : fix the bug in docs in 'cum_dist()' Example [#15618](https://github.com/apache/datafusion/pull/15618) (Adez017) +- Make tree the Default EXPLAIN Format and Reorder Documentation Sections [#15706](https://github.com/apache/datafusion/pull/15706) (kosiew) +- Add coerce int96 option for Parquet to support different TimeUnits, test int96_from_spark.parquet from parquet-testing [#15537](https://github.com/apache/datafusion/pull/15537) (mbutrovich) +- STRING_AGG missing functionality [#14412](https://github.com/apache/datafusion/pull/14412) (gabotechs) +- doc : update RepartitionExec display tree [#15710](https://github.com/apache/datafusion/pull/15710) (getChan) + +**Other:** + +- Improve documentation for `DataSourceExec`, `FileScanConfig`, `DataSource` etc [#14941](https://github.com/apache/datafusion/pull/14941) (alamb) +- Do not swap with projection when file is partitioned [#14956](https://github.com/apache/datafusion/pull/14956) (blaginin) +- Minor: Add more projection pushdown tests, clarify comments [#14963](https://github.com/apache/datafusion/pull/14963) (alamb) +- Update labeler components [#14942](https://github.com/apache/datafusion/pull/14942) (alamb) +- Deprecate `Expr::Wildcard` [#14959](https://github.com/apache/datafusion/pull/14959) (linhr) +- Minor: use FileScanConfig builder API in some tests [#14938](https://github.com/apache/datafusion/pull/14938) (alamb) +- Minor: improve documentation of `AggregateMode` [#14946](https://github.com/apache/datafusion/pull/14946) (alamb) +- chore(deps): bump thiserror from 2.0.11 to 2.0.12 [#14971](https://github.com/apache/datafusion/pull/14971) (dependabot[bot]) +- chore(deps): bump pyo3 from 0.23.4 to 0.23.5 [#14972](https://github.com/apache/datafusion/pull/14972) (dependabot[bot]) +- chore(deps): bump async-trait from 0.1.86 to 0.1.87 [#14973](https://github.com/apache/datafusion/pull/14973) (dependabot[bot]) +- Fix verification script and extended tests due to `rustup` changes [#14990](https://github.com/apache/datafusion/pull/14990) (alamb) +- Split out avro, parquet, json and csv into individual crates [#14951](https://github.com/apache/datafusion/pull/14951) (AdamGS) +- Minor: Add `backtrace` feature in datafusion-cli [#14997](https://github.com/apache/datafusion/pull/14997) (2010YOUY01) +- chore: Update `SessionStateBuilder::with_default_features` does not replace existing features [#14935](https://github.com/apache/datafusion/pull/14935) (irenjj) +- Make `create_ordering` pub and add doc for it [#14996](https://github.com/apache/datafusion/pull/14996) (xudong963) +- Simplify Between expression to Eq [#14994](https://github.com/apache/datafusion/pull/14994) (jayzhan211) +- Count wildcard alias [#14927](https://github.com/apache/datafusion/pull/14927) (jayzhan211) +- replace TypeSignature::String with TypeSignature::Coercible [#14917](https://github.com/apache/datafusion/pull/14917) (zjregee) +- Minor: Add indentation to EnforceDistribution test plans. [#15007](https://github.com/apache/datafusion/pull/15007) (wiedld) +- Minor: add method `SessionStateBuilder::new_with_default_features()` [#14998](https://github.com/apache/datafusion/pull/14998) (shruti2522) +- Implement `tree` explain for FilterExec [#15001](https://github.com/apache/datafusion/pull/15001) (alamb) +- Unparser add `AtArrow` and `ArrowAt` conversion to BinaryOperator [#14968](https://github.com/apache/datafusion/pull/14968) (cetra3) +- Add dependency checks to verify-release-candidate script [#15009](https://github.com/apache/datafusion/pull/15009) (waynexia) +- Fix: to_char Function Now Correctly Handles DATE Values in DataFusion [#14970](https://github.com/apache/datafusion/pull/14970) (kosiew) +- Make Substrait Schema Structs always non-nullable [#15011](https://github.com/apache/datafusion/pull/15011) (amoeba) +- Adjust physical optimizer rule order, put `ProjectionPushdown` at last [#15040](https://github.com/apache/datafusion/pull/15040) (xudong963) +- Move `UnwrapCastInComparison` into `Simplifier` [#15012](https://github.com/apache/datafusion/pull/15012) (jayzhan211) +- chore(deps): bump aws-config from 1.5.17 to 1.5.18 [#15041](https://github.com/apache/datafusion/pull/15041) (dependabot[bot]) +- chore(deps): bump bytes from 1.10.0 to 1.10.1 [#15042](https://github.com/apache/datafusion/pull/15042) (dependabot[bot]) +- Minor: Deprecate `ScalarValue::raw_data` [#15016](https://github.com/apache/datafusion/pull/15016) (qazxcdswe123) +- Implement tree explain for `DataSourceExec` [#15029](https://github.com/apache/datafusion/pull/15029) (alamb) +- Refactor test suite in EnforceDistribution, to use standard test config. [#15010](https://github.com/apache/datafusion/pull/15010) (wiedld) +- Update ring to v0.17.13 [#15063](https://github.com/apache/datafusion/pull/15063) (alamb) +- Remove deprecated function `OptimizerRule::try_optimize` [#15051](https://github.com/apache/datafusion/pull/15051) (qazxcdswe123) +- Minor: fix CI to make the sqllogic testing result consistent [#15059](https://github.com/apache/datafusion/pull/15059) (zhuqi-lucas) +- Refactor SortPushdown using the standard top-down visitor and using `EquivalenceProperties` [#14821](https://github.com/apache/datafusion/pull/14821) (wiedld) +- Improve explain tree formatting for longer lines / word wrap [#15031](https://github.com/apache/datafusion/pull/15031) (irenjj) +- chore(deps): bump sqllogictest from 0.27.2 to 0.28.0 [#15060](https://github.com/apache/datafusion/pull/15060) (dependabot[bot]) +- chore(deps): bump async-compression from 0.4.18 to 0.4.19 [#15061](https://github.com/apache/datafusion/pull/15061) (dependabot[bot]) +- Handle columns in with_new_exprs with a Join [#15055](https://github.com/apache/datafusion/pull/15055) (delamarch3) +- Minor: Improve documentation of `need_handle_count_bug` [#15050](https://github.com/apache/datafusion/pull/15050) (suibianwanwank) +- Implement `tree` explain for `HashJoinExec` [#15079](https://github.com/apache/datafusion/pull/15079) (irenjj) +- Implement tree explain for PartialSortExec [#15066](https://github.com/apache/datafusion/pull/15066) (irenjj) +- Implement `tree` explain for `SortExec` [#15077](https://github.com/apache/datafusion/pull/15077) (irenjj) +- Minor: final `46.0.0` release tweaks: changelog + instructions [#15073](https://github.com/apache/datafusion/pull/15073) (alamb) +- Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`, `So… [#15081](https://github.com/apache/datafusion/pull/15081) (irenjj) +- Implement `tree` explain for `BoundedWindowAggExec` and `WindowAggExec` [#15084](https://github.com/apache/datafusion/pull/15084) (irenjj) +- implement tree rendering for StreamingTableExec [#15085](https://github.com/apache/datafusion/pull/15085) (Standing-Man) +- chore(deps): bump semver from 1.0.25 to 1.0.26 [#15116](https://github.com/apache/datafusion/pull/15116) (dependabot[bot]) +- chore(deps): bump clap from 4.5.30 to 4.5.31 [#15115](https://github.com/apache/datafusion/pull/15115) (dependabot[bot]) +- implement tree explain for GlobalLimitExec [#15100](https://github.com/apache/datafusion/pull/15100) (zjregee) +- Minor: Cleanup useless/duplicated code in gen tools [#15113](https://github.com/apache/datafusion/pull/15113) (lewiszlw) +- Refactor EnforceDistribution test cases to demonstrate dependencies across optimizer runs. [#15074](https://github.com/apache/datafusion/pull/15074) (wiedld) +- Improve parsing `extra_info` in tree explain [#15125](https://github.com/apache/datafusion/pull/15125) (irenjj) +- Add tests for simplification and coercion of `SessionContext::create_physical_expr` [#15034](https://github.com/apache/datafusion/pull/15034) (alamb) +- Minor: Fix invalid query in test [#15131](https://github.com/apache/datafusion/pull/15131) (alamb) +- Do not display logical_plan win explain `tree` mode 🧹 [#15132](https://github.com/apache/datafusion/pull/15132) (alamb) +- Substrait support for propagating TableScan.filters to Substrait ReadRel.filter [#14194](https://github.com/apache/datafusion/pull/14194) (jamxia155) +- Fix wasm32 build on version 46 [#15102](https://github.com/apache/datafusion/pull/15102) (XiangpengHao) +- Fix broken `serde` feature [#15124](https://github.com/apache/datafusion/pull/15124) (vadimpiven) +- chore(deps): bump tempfile from 3.17.1 to 3.18.0 [#15146](https://github.com/apache/datafusion/pull/15146) (dependabot[bot]) +- chore(deps): bump syn from 2.0.98 to 2.0.100 [#15147](https://github.com/apache/datafusion/pull/15147) (dependabot[bot]) +- Implement tree explain for AggregateExec [#15103](https://github.com/apache/datafusion/pull/15103) (zebsme) +- Implement tree explain for `RepartitionExec` and `WorkTableExec` [#15137](https://github.com/apache/datafusion/pull/15137) (Standing-Man) +- Expand wildcard to actual expressions in `prepare_select_exprs` [#15090](https://github.com/apache/datafusion/pull/15090) (jayzhan211) +- fixed PushDownFilter bug [15047] [#15142](https://github.com/apache/datafusion/pull/15142) (Jiashu-Hu) +- Bump `env_logger` from `0.11.6` to `0.11.7` [#15148](https://github.com/apache/datafusion/pull/15148) (mbrobbel) +- Minor: fix extend sqllogical consistent with main test [#15145](https://github.com/apache/datafusion/pull/15145) (zhuqi-lucas) +- Implement tree rendering for `SortPreservingMergeExec` [#15140](https://github.com/apache/datafusion/pull/15140) (Standing-Man) +- Remove expand wildcard rule [#15170](https://github.com/apache/datafusion/pull/15170) (jayzhan211) +- chore: remove ScalarUDFImpl::return_type_from_exprs [#15130](https://github.com/apache/datafusion/pull/15130) (Blizzara) +- chore(deps): bump libc from 0.2.170 to 0.2.171 [#15176](https://github.com/apache/datafusion/pull/15176) (dependabot[bot]) +- chore(deps): bump serde_json from 1.0.139 to 1.0.140 [#15175](https://github.com/apache/datafusion/pull/15175) (dependabot[bot]) +- chore(deps): bump substrait from 0.53.2 to 0.54.0 [#15043](https://github.com/apache/datafusion/pull/15043) (dependabot[bot]) +- Minor: split EXPLAIN and ANALYZE planning into different functions [#15188](https://github.com/apache/datafusion/pull/15188) (alamb) +- Implement `tree` explain for `JsonSink` [#15185](https://github.com/apache/datafusion/pull/15185) (irenjj) +- Split out `datafusion-substrait` and `datafusion-proto` CI feature checks, increase coverage [#15156](https://github.com/apache/datafusion/pull/15156) (alamb) +- Remove unused wildcard expanding methods [#15180](https://github.com/apache/datafusion/pull/15180) (goldmedal) +- #15108 issue: "Non Panic Task error" is not an internal error [#15109](https://github.com/apache/datafusion/pull/15109) (Satyam018) +- Implement tree explain for LazyMemoryExec [#15187](https://github.com/apache/datafusion/pull/15187) (zebsme) +- implement tree explain for CoalesceBatchesExec [#15194](https://github.com/apache/datafusion/pull/15194) (Standing-Man) +- Implement `tree` explain for `CsvSink` [#15204](https://github.com/apache/datafusion/pull/15204) (irenjj) +- chore(deps): bump blake3 from 1.6.0 to 1.6.1 [#15198](https://github.com/apache/datafusion/pull/15198) (dependabot[bot]) +- chore(deps): bump clap from 4.5.31 to 4.5.32 [#15199](https://github.com/apache/datafusion/pull/15199) (dependabot[bot]) +- chore(deps): bump serde from 1.0.218 to 1.0.219 [#15197](https://github.com/apache/datafusion/pull/15197) (dependabot[bot]) +- Fix datafusion proto crate `json` feature [#15172](https://github.com/apache/datafusion/pull/15172) (Owen-CH-Leung) +- Add blog link to `EquivalenceProperties` docs [#15215](https://github.com/apache/datafusion/pull/15215) (alamb) +- Minor: split datafusion-cli testing into its own CI job [#15075](https://github.com/apache/datafusion/pull/15075) (alamb) +- Implement tree explain for InterleaveExec [#15219](https://github.com/apache/datafusion/pull/15219) (zebsme) +- Move catalog_common out of core [#15193](https://github.com/apache/datafusion/pull/15193) (logan-keede) +- chore(deps): bump tokio-util from 0.7.13 to 0.7.14 [#15223](https://github.com/apache/datafusion/pull/15223) (dependabot[bot]) +- chore(deps): bump aws-config from 1.5.18 to 1.6.0 [#15222](https://github.com/apache/datafusion/pull/15222) (dependabot[bot]) +- chore(deps): bump bzip2 from 0.5.1 to 0.5.2 [#15221](https://github.com/apache/datafusion/pull/15221) (dependabot[bot]) +- Document guidelines for physical operator yielding [#15030](https://github.com/apache/datafusion/pull/15030) (carols10cents) +- Implement `tree` explain for `ArrowFileSink`, fix original URL [#15206](https://github.com/apache/datafusion/pull/15206) (irenjj) +- Implement tree explain for `LocalLimitExec` [#15232](https://github.com/apache/datafusion/pull/15232) (shruti2522) +- Use insta for `DataFrame` tests [#15165](https://github.com/apache/datafusion/pull/15165) (blaginin) +- Re-enable github discussion [#15241](https://github.com/apache/datafusion/pull/15241) (2010YOUY01) +- Minor: exclude datafusion-cli testing for mac [#15240](https://github.com/apache/datafusion/pull/15240) (zhuqi-lucas) +- Implement tree explain for CoalescePartitionsExec [#15225](https://github.com/apache/datafusion/pull/15225) (Shreyaskr1409) +- Enable `used_underscore_binding` clippy lint [#15189](https://github.com/apache/datafusion/pull/15189) (Shreyaskr1409) +- Simpler to see expressions in explain `tree` mode [#15163](https://github.com/apache/datafusion/pull/15163) (irenjj) +- Fix invalid schema for unions in ViewTables [#15135](https://github.com/apache/datafusion/pull/15135) (Friede80) +- Make `ListingTableUrl::try_new` public [#15250](https://github.com/apache/datafusion/pull/15250) (linhr) +- Fix wildcard dataframe case [#15230](https://github.com/apache/datafusion/pull/15230) (jayzhan211) +- Simplify the printing of all plans containing `expr` in `tree` mode [#15249](https://github.com/apache/datafusion/pull/15249) (irenjj) +- Support utf8view datatype for window [#15257](https://github.com/apache/datafusion/pull/15257) (zhuqi-lucas) +- chore: remove deprecated variants of UDF's invoke (invoke, invoke_no_args, invoke_batch) [#15123](https://github.com/apache/datafusion/pull/15123) (Blizzara) +- Improve feature flag CI coverage `datafusion` and `datafusion-functions` [#15203](https://github.com/apache/datafusion/pull/15203) (alamb) +- Add debug logging for default catalog overwrite in SessionState build [#15251](https://github.com/apache/datafusion/pull/15251) (byte-sourcerer) +- Implement tree explain for PlaceholderRowExec [#15270](https://github.com/apache/datafusion/pull/15270) (zebsme) +- Implement tree explain for UnionExec [#15278](https://github.com/apache/datafusion/pull/15278) (zebsme) +- Migrate dataframe tests to `insta` [#15262](https://github.com/apache/datafusion/pull/15262) (jsai28) +- Minor: consistently apply `clippy::clone_on_ref_ptr` in all crates [#15284](https://github.com/apache/datafusion/pull/15284) (alamb) +- chore(deps): bump async-trait from 0.1.87 to 0.1.88 [#15294](https://github.com/apache/datafusion/pull/15294) (dependabot[bot]) +- chore(deps): bump uuid from 1.15.1 to 1.16.0 [#15292](https://github.com/apache/datafusion/pull/15292) (dependabot[bot]) +- Add CatalogProvider and SchemaProvider to FFI Crate [#15280](https://github.com/apache/datafusion/pull/15280) (timsaucer) +- Refactor file schema type coercions [#15268](https://github.com/apache/datafusion/pull/15268) (xudong963) +- chore(deps): bump rust_decimal from 1.36.0 to 1.37.0 [#15293](https://github.com/apache/datafusion/pull/15293) (dependabot[bot]) +- chore: Attach Diagnostic to "incompatible type in unary expression" error [#15209](https://github.com/apache/datafusion/pull/15209) (onlyjackfrost) +- Support logic optimize rule to pass the case that Utf8view datatype combined with Utf8 datatype [#15239](https://github.com/apache/datafusion/pull/15239) (zhuqi-lucas) +- Migrate user_defined tests to insta [#15255](https://github.com/apache/datafusion/pull/15255) (shruti2522) +- Remove inline table scan analyzer rule [#15201](https://github.com/apache/datafusion/pull/15201) (jayzhan211) +- CI Red: Fix union in view table test [#15300](https://github.com/apache/datafusion/pull/15300) (jayzhan211) +- refactor: Move view and stream from `datasource` to `catalog`, deprecate `View::try_new` [#15260](https://github.com/apache/datafusion/pull/15260) (logan-keede) +- chore(deps): bump substrait from 0.54.0 to 0.55.0 [#15305](https://github.com/apache/datafusion/pull/15305) (dependabot[bot]) +- chore(deps): bump half from 2.4.1 to 2.5.0 [#15303](https://github.com/apache/datafusion/pull/15303) (dependabot[bot]) +- chore(deps): bump mimalloc from 0.1.43 to 0.1.44 [#15304](https://github.com/apache/datafusion/pull/15304) (dependabot[bot]) +- Fix predicate pushdown for custom SchemaAdapters [#15263](https://github.com/apache/datafusion/pull/15263) (adriangb) +- Fix extended tests by restore datafusion-testing submodule [#15318](https://github.com/apache/datafusion/pull/15318) (alamb) +- Support Duration in min/max agg functions [#15310](https://github.com/apache/datafusion/pull/15310) (svranesevic) +- Migrate tests to insta [#15288](https://github.com/apache/datafusion/pull/15288) (jsai28) +- chore(deps): bump quote from 1.0.38 to 1.0.40 [#15332](https://github.com/apache/datafusion/pull/15332) (dependabot[bot]) +- chore(deps): bump blake3 from 1.6.1 to 1.7.0 [#15331](https://github.com/apache/datafusion/pull/15331) (dependabot[bot]) +- Simplify display format of `AggregateFunctionExpr`, add `Expr::sql_name` [#15253](https://github.com/apache/datafusion/pull/15253) (irenjj) +- chore(deps): bump indexmap from 2.7.1 to 2.8.0 [#15333](https://github.com/apache/datafusion/pull/15333) (dependabot[bot]) +- chore(deps): bump tokio from 1.43.0 to 1.44.1 [#15347](https://github.com/apache/datafusion/pull/15347) (dependabot[bot]) +- chore(deps): bump tempfile from 3.18.0 to 3.19.1 [#15346](https://github.com/apache/datafusion/pull/15346) (dependabot[bot]) +- Minor: Keep debug symbols for `release-nonlto` build [#15350](https://github.com/apache/datafusion/pull/15350) (2010YOUY01) +- Use `any` instead of `for_each` [#15289](https://github.com/apache/datafusion/pull/15289) (xudong963) +- refactor: move `CteWorkTable`, `default_table_source` a bunch of files out of core [#15316](https://github.com/apache/datafusion/pull/15316) (logan-keede) +- Fix empty aggregation function count() in Substrait [#15345](https://github.com/apache/datafusion/pull/15345) (gabotechs) +- Improved error for expand wildcard rule [#15287](https://github.com/apache/datafusion/pull/15287) (Jiashu-Hu) +- Added tests with are writing into parquet files in memory for issue #… [#15325](https://github.com/apache/datafusion/pull/15325) (pranavJibhakate) +- Migrate physical plan tests to `insta` (Part-1) [#15313](https://github.com/apache/datafusion/pull/15313) (Shreyaskr1409) +- Fix array_has_all and array_has_any with empty array [#15039](https://github.com/apache/datafusion/pull/15039) (LuQQiu) +- Update datafusion-testing pin to fix extended tests [#15368](https://github.com/apache/datafusion/pull/15368) (alamb) +- chore(deps): Update sqlparser to 0.55.0 [#15183](https://github.com/apache/datafusion/pull/15183) (PokIsemaine) +- Only unnest source for `EmptyRelation` [#15159](https://github.com/apache/datafusion/pull/15159) (blaginin) +- chore(deps): bump rust_decimal from 1.37.0 to 1.37.1 [#15378](https://github.com/apache/datafusion/pull/15378) (dependabot[bot]) +- chore(deps): bump chrono-tz from 0.10.1 to 0.10.2 [#15377](https://github.com/apache/datafusion/pull/15377) (dependabot[bot]) +- remove the duplicate test for unparser [#15385](https://github.com/apache/datafusion/pull/15385) (goldmedal) +- Minor: add average time for clickbench benchmark query [#15381](https://github.com/apache/datafusion/pull/15381) (zhuqi-lucas) +- include some BinaryOperator from sqlparser [#15327](https://github.com/apache/datafusion/pull/15327) (waynexia) +- Add "end to end parquet reading test" for WASM [#15362](https://github.com/apache/datafusion/pull/15362) (jsai28) +- Migrate physical plan tests to `insta` (Part-2) [#15364](https://github.com/apache/datafusion/pull/15364) (Shreyaskr1409) +- Migrate physical plan tests to `insta` (Part-3 / Final) [#15399](https://github.com/apache/datafusion/pull/15399) (Shreyaskr1409) +- Restore lazy evaluation of fallible CASE [#15390](https://github.com/apache/datafusion/pull/15390) (findepi) +- chore(deps): bump log from 0.4.26 to 0.4.27 [#15410](https://github.com/apache/datafusion/pull/15410) (dependabot[bot]) +- chore(deps): bump chrono-tz from 0.10.2 to 0.10.3 [#15412](https://github.com/apache/datafusion/pull/15412) (dependabot[bot]) +- Perf: Support Utf8View datatype single column comparisons for SortPreservingMergeStream [#15348](https://github.com/apache/datafusion/pull/15348) (zhuqi-lucas) +- Enforce JOIN plan to require condition [#15334](https://github.com/apache/datafusion/pull/15334) (goldmedal) +- Fix type coercion for unsigned and signed integers (`Int64` vs `UInt64`, etc) [#15341](https://github.com/apache/datafusion/pull/15341) (Omega359) +- simplify `array_has` UDF to `InList` expr when haystack is constant [#15354](https://github.com/apache/datafusion/pull/15354) (davidhewitt) +- Move `DataSink` to `datasource` and add session crate [#15371](https://github.com/apache/datafusion/pull/15371) (jayzhan-synnada) +- refactor: SpillManager into a separate file [#15407](https://github.com/apache/datafusion/pull/15407) (Weijun-H) +- Always use `PartitionMode::Auto` in planner [#15339](https://github.com/apache/datafusion/pull/15339) (Dandandan) +- Fix link to Volcano paper [#15437](https://github.com/apache/datafusion/pull/15437) (JackKelly) +- minor: Add new crates to labeler [#15426](https://github.com/apache/datafusion/pull/15426) (logan-keede) +- refactor: Use SpillManager for all spilling scenarios [#15405](https://github.com/apache/datafusion/pull/15405) (2010YOUY01) +- refactor(hash_join): Move JoinHashMap to separate mod [#15419](https://github.com/apache/datafusion/pull/15419) (ctsk) +- Migrate datasource tests to insta [#15258](https://github.com/apache/datafusion/pull/15258) (shruti2522) +- Add `downcast_to_source` method for `DataSourceExec` [#15416](https://github.com/apache/datafusion/pull/15416) (xudong963) +- refactor: use TypeSignature::Coercible for crypto functions [#14826](https://github.com/apache/datafusion/pull/14826) (Chen-Yuan-Lai) +- Minor: fix doc for `FileGroupPartitioner` [#15448](https://github.com/apache/datafusion/pull/15448) (xudong963) +- chore(deps): bump clap from 4.5.32 to 4.5.34 [#15452](https://github.com/apache/datafusion/pull/15452) (dependabot[bot]) +- Fix roundtrip bug with empty projection in DataSourceExec [#15449](https://github.com/apache/datafusion/pull/15449) (XiangpengHao) +- Triggering extended tests through PR comment: `Run extended tests` [#15101](https://github.com/apache/datafusion/pull/15101) (danila-b) +- Use `equals_datatype` to compare type when type coercion [#15366](https://github.com/apache/datafusion/pull/15366) (goldmedal) +- Fix no effect metrics bug in ParquetSource [#15460](https://github.com/apache/datafusion/pull/15460) (XiangpengHao) +- chore(deps): bump aws-config from 1.6.0 to 1.6.1 [#15470](https://github.com/apache/datafusion/pull/15470) (dependabot[bot]) +- minor: Allow to run TPCH bench for a specific query [#15467](https://github.com/apache/datafusion/pull/15467) (comphead) +- Migrate subtraits tests to insta, part1 [#15444](https://github.com/apache/datafusion/pull/15444) (qstommyshu) +- Add `FileScanConfigBuilder` [#15352](https://github.com/apache/datafusion/pull/15352) (blaginin) +- Update ClickBench queries to avoid to_timestamp_seconds [#15475](https://github.com/apache/datafusion/pull/15475) (acking-you) +- Remove CoalescePartitions insertion from HashJoinExec [#15476](https://github.com/apache/datafusion/pull/15476) (ctsk) +- Migrate-substrait-tests-to-insta, part2 [#15480](https://github.com/apache/datafusion/pull/15480) (qstommyshu) +- Revert #15476 to fix the datafusion-examples CI fail [#15496](https://github.com/apache/datafusion/pull/15496) (goldmedal) +- Migrate datafusion/sql tests to insta, part1 [#15497](https://github.com/apache/datafusion/pull/15497) (qstommyshu) +- Allow type coersion of zero input arrays to nullary [#15487](https://github.com/apache/datafusion/pull/15487) (timsaucer) +- Decimal type support for `to_timestamp` [#15486](https://github.com/apache/datafusion/pull/15486) (jatin510) +- refactor: Move `Memtable` to catalog [#15459](https://github.com/apache/datafusion/pull/15459) (logan-keede) +- Migrate optimizer tests to insta [#15446](https://github.com/apache/datafusion/pull/15446) (qstommyshu) +- FIX : some benchmarks are failing [#15367](https://github.com/apache/datafusion/pull/15367) (getChan) +- Add query to extended clickbench suite for "complex filter" [#15500](https://github.com/apache/datafusion/pull/15500) (acking-you) +- Extract tokio runtime creation from hot loop in benchmarks [#15508](https://github.com/apache/datafusion/pull/15508) (Omega359) +- chore(deps): bump blake3 from 1.7.0 to 1.8.0 [#15502](https://github.com/apache/datafusion/pull/15502) (dependabot[bot]) +- Minor: clone and debug for FileSinkConfig [#15516](https://github.com/apache/datafusion/pull/15516) (jayzhan211) +- use state machine to refactor the `get_files_with_limit` method [#15521](https://github.com/apache/datafusion/pull/15521) (xudong963) +- Migrate `datafusion/sql` tests to insta, part2 [#15499](https://github.com/apache/datafusion/pull/15499) (qstommyshu) +- Disable sccache action to fix gh cache issue [#15536](https://github.com/apache/datafusion/pull/15536) (Omega359) +- refactor: Cleanup unused `fetch` field inside `ExternalSorter` [#15525](https://github.com/apache/datafusion/pull/15525) (2010YOUY01) +- Fix duplicate unqualified Field name (schema error) on join queries [#15438](https://github.com/apache/datafusion/pull/15438) (LiaCastaneda) +- Add utf8view benchmark for aggregate topk [#15518](https://github.com/apache/datafusion/pull/15518) (zhuqi-lucas) +- ArraySort: support structs [#15527](https://github.com/apache/datafusion/pull/15527) (cht42) +- Migrate datafusion/sql tests to insta, part3 [#15533](https://github.com/apache/datafusion/pull/15533) (qstommyshu) +- Migrate datafusion/sql tests to insta, part4 [#15548](https://github.com/apache/datafusion/pull/15548) (qstommyshu) +- Add topk information into tree explain plans [#15547](https://github.com/apache/datafusion/pull/15547) (kumarlokesh) +- Minor: add Arc for statistics in FileGroup [#15564](https://github.com/apache/datafusion/pull/15564) (xudong963) +- Test: configuration fuzzer for (external) sort queries [#15501](https://github.com/apache/datafusion/pull/15501) (2010YOUY01) +- minor: Organize fields inside SortMergeJoinStream [#15557](https://github.com/apache/datafusion/pull/15557) (suibianwanwank) +- Minor: rm session downcast [#15575](https://github.com/apache/datafusion/pull/15575) (jayzhan211) +- Migrate datafusion/sql tests to insta, part5 [#15567](https://github.com/apache/datafusion/pull/15567) (qstommyshu) +- Add SQL logic tests for compound field access in JOIN conditions [#15556](https://github.com/apache/datafusion/pull/15556) (kosiew) +- Run audit CI check on all pushes to main [#15572](https://github.com/apache/datafusion/pull/15572) (alamb) +- Introduce load-balanced `split_groups_by_statistics` method [#15473](https://github.com/apache/datafusion/pull/15473) (xudong963) +- chore: update clickbench [#15574](https://github.com/apache/datafusion/pull/15574) (chenkovsky) +- Improve spill performance: Disable re-validation of spilled files [#15454](https://github.com/apache/datafusion/pull/15454) (zebsme) +- chore: rm duplicated `JoinOn` type [#15590](https://github.com/apache/datafusion/pull/15590) (jayzhan211) +- Chore: Call arrow's methods `row_count` and `skipped_row_count` [#15587](https://github.com/apache/datafusion/pull/15587) (jayzhan211) +- Actually run wasm test in ci [#15595](https://github.com/apache/datafusion/pull/15595) (XiangpengHao) +- Migrate datafusion/sql tests to insta, part6 [#15578](https://github.com/apache/datafusion/pull/15578) (qstommyshu) +- Add test case for new casting feature from date to tz-aware timestamps [#15609](https://github.com/apache/datafusion/pull/15609) (friendlymatthew) +- Remove CoalescePartitions insertion from Joins [#15570](https://github.com/apache/datafusion/pull/15570) (ctsk) +- fix doc and broken api [#15602](https://github.com/apache/datafusion/pull/15602) (logan-keede) +- Migrate datafusion/sql tests to insta, part7 [#15621](https://github.com/apache/datafusion/pull/15621) (qstommyshu) +- ignore security_audit CI check proc-macro-error warning [#15626](https://github.com/apache/datafusion/pull/15626) (Jiashu-Hu) +- chore(deps): bump tokio from 1.44.1 to 1.44.2 [#15627](https://github.com/apache/datafusion/pull/15627) (dependabot[bot]) +- Upgrade toolchain to Rust-1.86 [#15625](https://github.com/apache/datafusion/pull/15625) (jsai28) +- chore(deps): bump bigdecimal from 0.4.7 to 0.4.8 [#15523](https://github.com/apache/datafusion/pull/15523) (dependabot[bot]) +- chore(deps): bump the arrow-parquet group across 1 directory with 7 updates [#15593](https://github.com/apache/datafusion/pull/15593) (dependabot[bot]) +- chore: improve RepartitionExec display tree [#15606](https://github.com/apache/datafusion/pull/15606) (getChan) +- Move back schema not matching check and workaround [#15580](https://github.com/apache/datafusion/pull/15580) (LiaCastaneda) +- Minor: refine comments for statistics compution [#15647](https://github.com/apache/datafusion/pull/15647) (xudong963) +- Remove uneeded binary_op benchmarks [#15632](https://github.com/apache/datafusion/pull/15632) (alamb) +- chore(deps): bump blake3 from 1.8.0 to 1.8.1 [#15650](https://github.com/apache/datafusion/pull/15650) (dependabot[bot]) +- chore(deps): bump mimalloc from 0.1.44 to 0.1.46 [#15651](https://github.com/apache/datafusion/pull/15651) (dependabot[bot]) +- chore: avoid erroneuous warning for FFI table operation (only not default value) [#15579](https://github.com/apache/datafusion/pull/15579) (chenkovsky) +- Update datafusion-testing pin (to fix extended test on main) [#15655](https://github.com/apache/datafusion/pull/15655) (alamb) +- Ignore false positive only_used_in_recursion Clippy warning [#15635](https://github.com/apache/datafusion/pull/15635) (DerGut) +- chore: Rename protobuf Java package [#15658](https://github.com/apache/datafusion/pull/15658) (andygrove) +- Remove redundant `Precision` combination code in favor of `Precision::min/max/add` [#15659](https://github.com/apache/datafusion/pull/15659) (alamb) +- Introduce DynamicFilterSource and DynamicPhysicalExpr [#15568](https://github.com/apache/datafusion/pull/15568) (adriangb) +- Public some projected methods in `FileScanConfig` [#15671](https://github.com/apache/datafusion/pull/15671) (xudong963) +- fix decimal precision issue in simplify expression optimize rule [#15588](https://github.com/apache/datafusion/pull/15588) (jayzhan211) +- Implement Future for SpawnedTask. [#15653](https://github.com/apache/datafusion/pull/15653) (ashdnazg) +- chore(deps): bump crossbeam-channel from 0.5.14 to 0.5.15 [#15674](https://github.com/apache/datafusion/pull/15674) (dependabot[bot]) +- chore(deps): bump clap from 4.5.34 to 4.5.35 [#15668](https://github.com/apache/datafusion/pull/15668) (dependabot[bot]) +- [Minor] Use interleave_record_batch in TopK implementation [#15677](https://github.com/apache/datafusion/pull/15677) (Dandandan) +- Consolidate statistics merging code (try 2) [#15661](https://github.com/apache/datafusion/pull/15661) (alamb) +- Add Table Functions to FFI Crate [#15581](https://github.com/apache/datafusion/pull/15581) (timsaucer) +- Remove waits from blocking threads reading spill files. [#15654](https://github.com/apache/datafusion/pull/15654) (ashdnazg) +- chore(deps): bump sysinfo from 0.33.1 to 0.34.2 [#15682](https://github.com/apache/datafusion/pull/15682) (dependabot[bot]) +- Minor: add order by arg for last value [#15695](https://github.com/apache/datafusion/pull/15695) (jayzhan211) +- Upgrade to arrow/parquet 55, and `object_store` to `0.12.0` and pyo3 to `0.24.0` [#15466](https://github.com/apache/datafusion/pull/15466) (alamb) +- tests: only refresh the minimum sysinfo in mem limit tests. [#15702](https://github.com/apache/datafusion/pull/15702) (ashdnazg) +- ci: fix workflow triggering extended tests from pr comments. [#15704](https://github.com/apache/datafusion/pull/15704) (ashdnazg) +- chore(deps): bump flate2 from 1.1.0 to 1.1.1 [#15703](https://github.com/apache/datafusion/pull/15703) (dependabot[bot]) +- Fix internal error in sort when hitting memory limit [#15692](https://github.com/apache/datafusion/pull/15692) (DerGut) +- Update checked in Cargo.lock file to get clean CI [#15725](https://github.com/apache/datafusion/pull/15725) (alamb) +- chore(deps): bump indexmap from 2.8.0 to 2.9.0 [#15732](https://github.com/apache/datafusion/pull/15732) (dependabot[bot]) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 48 dependabot[bot] + 34 Andrew Lamb + 15 Jay Zhan + 15 Qi Zhu + 15 irenjj + 15 xudong.w + 12 Chen Chongchen + 12 Yongting You + 10 Tommy shu + 7 Shruti Sharma + 6 Alan Tang + 6 Arttu + 6 Jiashu Hu + 6 Shreyas (Lua) + 6 logan-keede + 6 zeb + 5 Dmitrii Blaginin + 5 Geoffrey Claude + 5 Jax Liu + 5 YuNing Chen + 4 Bruce Ritchie + 4 Christian + 4 Eshed Schacham + 4 Xiangpeng Hao + 4 wiedld + 3 Adrian Garcia Badaracco + 3 Daniël Heres + 3 Gabriel + 3 LB7666 + 3 Namgung Chan + 3 Ruihang Xia + 3 Tim Saucer + 3 jsai28 + 3 kosiew + 3 suibianwanwan + 2 Bryce Mecum + 2 Carol (Nichols || Goulding) + 2 Heran Lin + 2 Jannik Steinmann + 2 Jyotir Sai + 2 Li-Lun Lin + 2 Lía Adriana + 2 Oleks V + 2 Raz Luvaton + 2 UBarney + 2 aditya singh rathore + 2 westhide + 2 zjregee + 1 @clflushopt + 1 Adam Gutglick + 1 Alex Huang + 1 Alex Wilcoxson + 1 Amos Aidoo + 1 Andy Grove + 1 Andy Yen + 1 Berkay Şahin + 1 Chang + 1 Danila Baklazhenko + 1 David Hewitt + 1 Emily Matheys + 1 Eren Avsarogullari + 1 Hari Varsha + 1 Ian Lai + 1 Jack Kelly + 1 Jagdish Parihar + 1 Joseph Koshakow + 1 Lokesh + 1 LuQQiu + 1 Matt Butrovich + 1 Matt Friede + 1 Matthew Kim + 1 Matthijs Brobbel + 1 Om Kenge + 1 Owen Leung + 1 Peter L + 1 Piotr Findeisen + 1 Rohan Krishnaswamy + 1 Satyam018 + 1 Sava Vranešević + 1 Siddhartha Sahu + 1 Sile Zhou + 1 Vadim Piven + 1 Zaki + 1 christophermcdermott + 1 cht42 + 1 cjw + 1 delamarch3 + 1 ding-young + 1 haruband + 1 jamxia155 + 1 oznur-synnada + 1 peasee + 1 pranavJibhakate + 1 张林伟 +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index a90da66e4b0b..7a46d59d893e 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -69,7 +69,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_statistics_size | 4096 | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting max_statistics_size is deprecated, currently it is not being used | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 46.0.1 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 47.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.statistics_truncate_length | NULL | (writing) Sets statictics truncate length. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page |