diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 5b0b69a85597..f4089c6a8cbe 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -55,6 +55,7 @@ apache-avro = { version = "0.17", default-features = false, features = [ arrow = { workspace = true } arrow-ipc = { workspace = true } base64 = "0.22.1" +chrono = { workspace = true } half = { workspace = true } hashbrown = { workspace = true } indexmap = { workspace = true } diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 750b78d59e2e..f774f46b424d 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -52,13 +52,14 @@ use arrow::compute::kernels::{ }; use arrow::datatypes::{ i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, - Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type, - Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, - IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, + Date32Type, Field, Float32Type, Int16Type, Int32Type, Int64Type, Int8Type, + IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, + TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION, }; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; +use chrono::{Duration, NaiveDate}; use half::f16; pub use struct_builder::ScalarStructBuilder; @@ -3816,12 +3817,28 @@ impl fmt::Display for ScalarValue { ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?, - ScalarValue::Date32(e) => { - format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))? - } - ScalarValue::Date64(e) => { - format_option!(f, e.map(|v| Date64Type::to_naive_date(v).to_string()))? - } + ScalarValue::Date32(e) => format_option!( + f, + e.map(|v| { + let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); + match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap()) + { + Some(date) => date.to_string(), + None => "".to_string(), + } + }) + )?, + ScalarValue::Date64(e) => format_option!( + f, + e.map(|v| { + let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); + match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap()) + { + Some(date) => date.to_string(), + None => "".to_string(), + } + }) + )?, ScalarValue::Time32Second(e) => format_option!(f, e)?, ScalarValue::Time32Millisecond(e) => format_option!(f, e)?, ScalarValue::Time64Microsecond(e) => format_option!(f, e)?, @@ -7229,6 +7246,19 @@ mod tests { "); } + #[test] + fn test_display_date64_large_values() { + assert_eq!( + format!("{}", ScalarValue::Date64(Some(790179464505))), + "1995-01-15" + ); + // This used to panic, see https://github.com/apache/arrow-rs/issues/7728 + assert_eq!( + format!("{}", ScalarValue::Date64(Some(-790179464505600000))), + "" + ); + } + #[test] fn test_struct_display_null() { let fields = vec![Field::new("a", DataType::Int32, false)]; diff --git a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs index f8b0d5cd463d..1f47412caf2a 100644 --- a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs @@ -48,7 +48,6 @@ use super::record_batch_generator::{get_supported_types_columns, RecordBatchGene /// /// Now memory limiting is disabled by default. See TODOs in `SortQueryFuzzer`. #[tokio::test(flavor = "multi_thread")] -#[ignore = "https://github.com/apache/datafusion/issues/16452"] async fn sort_query_fuzzer_runner() { let random_seed = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH)