From d55cc20d9b97f1970606df96881c93e5c0479733 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Wed, 22 Nov 2023 16:52:16 +0000 Subject: [PATCH 1/2] Allow default value on lag/lead if they are coerceable to value Fixes #8307 --- .../physical-expr/src/window/lead_lag.rs | 29 +++++++++++++++---- datafusion/sqllogictest/test_files/window.slt | 21 ++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/datafusion/physical-expr/src/window/lead_lag.rs b/datafusion/physical-expr/src/window/lead_lag.rs index d22660d41ebd..b90361931e27 100644 --- a/datafusion/physical-expr/src/window/lead_lag.rs +++ b/datafusion/physical-expr/src/window/lead_lag.rs @@ -21,10 +21,11 @@ use crate::window::BuiltInWindowFunctionExpr; use crate::PhysicalExpr; use arrow::array::ArrayRef; -use arrow::compute::cast; +use arrow::compute::{cast, cast_with_options, CastOptions}; use arrow::datatypes::{DataType, Field}; use datafusion_common::ScalarValue; -use datafusion_common::{internal_err, DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::type_coercion::functions::can_coerce_from; use datafusion_expr::PartitionEvaluator; use std::any::Any; use std::cmp::min; @@ -235,11 +236,27 @@ fn get_default_value( default_value: Option<&ScalarValue>, dtype: &DataType, ) -> Result { - if let Some(value) = default_value { - if let ScalarValue::Int64(Some(val)) = value { - ScalarValue::try_from_string(val.to_string(), dtype) + if let Some(default_value) = default_value { + let default_value_type = default_value.data_type(); + if can_coerce_from(dtype, &default_value_type) + || (dtype.is_integer() && default_value_type.is_integer()) + { + ScalarValue::try_from_array( + &cast_with_options( + &default_value.to_array()?, + dtype, + &CastOptions { + safe: false, + format_options: Default::default(), + }, + )?, + 0, + ) } else { - internal_err!("Expects default value to have Int64 type") + Err(DataFusionError::Internal(format!( + "Cannot coerce default value {:?} {} to {:?}", + default_value, default_value_type, dtype + ))) } } else { Ok(ScalarValue::try_from(dtype)?) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 1ef0ba0d10e3..2573c06c5eaa 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1153,6 +1153,27 @@ SELECT 141680161 63044568 63044568 225513085 225513085 145294611 141047417 141047417 243203849 243203849 +# can coerce given default value to lag/lead value type +query PP +SELECT + LAG(c9::timestamp, 99999999, '2000-11-22T16:17:06.624Z') OVER(ORDER BY c9) as lag_default_coerced, + LEAD(c9::timestamp, 99999999, '2023-11-22T16:17:06.624Z') OVER(ORDER BY c9) as lead_default_coerced + FROM aggregate_test_100 + LIMIT 1 +---- +2000-11-22T16:17:06.624 2023-11-22T16:17:06.624 + +# cannot coerce given default value to lag value type +query error +SELECT + LAG(c9, 99999999, '2000-11-22T16:17:06.624Z') OVER(ORDER BY c9) + FROM aggregate_test_100 + LIMIT 1 +---- +DataFusion error: Internal error: Cannot coerce default value Utf8("2000-11-22T16:17:06.624Z") Utf8 to UInt64. +This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker + + #fn test_window_frame_first_value_last_value_aggregate query IIII SELECT From 39603fae829ddf226ded97a53e0780736ece175a Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 30 Nov 2023 08:36:19 +0000 Subject: [PATCH 2/2] use `comparison_coercion` and `internal_err!` --- datafusion/physical-expr/src/window/lead_lag.rs | 17 +++++++++-------- datafusion/sqllogictest/test_files/window.slt | 3 +-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion/physical-expr/src/window/lead_lag.rs b/datafusion/physical-expr/src/window/lead_lag.rs index b90361931e27..5cd11c039082 100644 --- a/datafusion/physical-expr/src/window/lead_lag.rs +++ b/datafusion/physical-expr/src/window/lead_lag.rs @@ -23,9 +23,9 @@ use crate::PhysicalExpr; use arrow::array::ArrayRef; use arrow::compute::{cast, cast_with_options, CastOptions}; use arrow::datatypes::{DataType, Field}; -use datafusion_common::ScalarValue; +use datafusion_common::{internal_err, ScalarValue}; use datafusion_common::{DataFusionError, Result}; -use datafusion_expr::type_coercion::functions::can_coerce_from; +use datafusion_expr::type_coercion::binary::comparison_coercion; use datafusion_expr::PartitionEvaluator; use std::any::Any; use std::cmp::min; @@ -238,9 +238,8 @@ fn get_default_value( ) -> Result { if let Some(default_value) = default_value { let default_value_type = default_value.data_type(); - if can_coerce_from(dtype, &default_value_type) - || (dtype.is_integer() && default_value_type.is_integer()) - { + + if comparison_coercion(&default_value_type, dtype).is_some() { ScalarValue::try_from_array( &cast_with_options( &default_value.to_array()?, @@ -253,10 +252,12 @@ fn get_default_value( 0, ) } else { - Err(DataFusionError::Internal(format!( + internal_err!( "Cannot coerce default value {:?} {} to {:?}", - default_value, default_value_type, dtype - ))) + default_value, + default_value_type, + dtype + ) } } else { Ok(ScalarValue::try_from(dtype)?) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 2573c06c5eaa..5a9b8382613f 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1170,8 +1170,7 @@ SELECT FROM aggregate_test_100 LIMIT 1 ---- -DataFusion error: Internal error: Cannot coerce default value Utf8("2000-11-22T16:17:06.624Z") Utf8 to UInt64. -This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker +DataFusion error: Arrow error: Cast error: Cannot cast string '2000-11-22T16:17:06.624Z' to value of UInt64 type #fn test_window_frame_first_value_last_value_aggregate