diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 1027e97d061a..aa35b256825a 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -32,6 +32,12 @@ use datafusion_common::{ exec_datafusion_err, plan_datafusion_err, plan_err, DataFusionError, Result, }; +/// Returns true if this type is Decimal. +fn is_decimal(data_type: &DataType) -> bool { + use DataType::*; + matches!(data_type, Decimal128(_, _) | Decimal256(_, _)) +} + /// The type signature of an instantiation of binary operator expression such as /// `lhs + rhs` /// @@ -290,7 +296,8 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option equality is possible return Some(lhs_type.clone()); } - comparison_binary_numeric_coercion(lhs_type, rhs_type) + + numeric_coercion(lhs_type, rhs_type) .or_else(|| dictionary_coercion(lhs_type, rhs_type, true)) .or_else(|| temporal_coercion(lhs_type, rhs_type)) .or_else(|| string_coercion(lhs_type, rhs_type)) @@ -354,73 +361,144 @@ fn string_temporal_coercion( match_rule(lhs_type, rhs_type).or_else(|| match_rule(rhs_type, lhs_type)) } -/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation -/// where one both are numeric -fn comparison_binary_numeric_coercion( - lhs_type: &DataType, - rhs_type: &DataType, -) -> Option { +/// Decimal coercion rules for comparison operations, including comparison between decimal and non-decimal types. +fn binary_decimal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; - if !lhs_type.is_numeric() || !rhs_type.is_numeric() { + + // At least on should be decimal + if !is_decimal(lhs_type) && !is_decimal(rhs_type) { return None; }; - // same type => all good - if lhs_type == rhs_type { - return Some(lhs_type.clone()); + match (lhs_type, rhs_type) { + // Prefer decimal data type over floating point for comparison operation + (Decimal128(_, _), Decimal128(_, _)) | (Decimal256(_, _), Decimal256(_, _)) => { + get_wider_decimal_type(lhs_type, rhs_type) + } + (decimal_type @ Decimal128(_, _), other_type) + | (other_type, decimal_type @ Decimal128(_, _)) + | (decimal_type @ Decimal256(_, _), other_type) + | (other_type, decimal_type @ Decimal256(_, _)) => { + get_comparison_common_decimal_type(decimal_type, other_type) + } + _ => None, } +} + +/// Coerce non decimal numeric types to a common type for the purposes of a comparison operation and math operation +/// +/// We tend to find the narrowest type that can represent both inputs if possible, +/// so the return type MAY not be the same as either input type. +/// +/// For example, `Int64` and `Float32` will coerce to `Float64`. +/// +/// Also, since there might not be perfect type for both inputs, so data lossy is expected. +/// For example, `UInt64` and `Float64` will coerce to `Float64`, so casting `UInt64` to `Float64` will lose data. +fn non_decimal_numeric_coercion( + lhs_type: &DataType, + rhs_type: &DataType, +) -> Option { + use arrow::datatypes::DataType::*; // these are ordered from most informative to least informative so // that the coercion does not lose information via truncation match (lhs_type, rhs_type) { - // Prefer decimal data type over floating point for comparison operation - (Decimal128(_, _), Decimal128(_, _)) => { - get_wider_decimal_type(lhs_type, rhs_type) + // f64 + // Prefer f64 over u64 and i64, data lossy is expected + (Float64, _) | (_, Float64) => Some(Float64), + + // u64 + // Prefer f64 over u64, data lossy is expected + (UInt64, Float32) | (Float32, UInt64) | (UInt64, Float16) | (Float16, UInt64) => { + Some(Float64) } - (Decimal128(_, _), _) => get_comparison_common_decimal_type(lhs_type, rhs_type), - (_, Decimal128(_, _)) => get_comparison_common_decimal_type(rhs_type, lhs_type), - (Decimal256(_, _), Decimal256(_, _)) => { - get_wider_decimal_type(lhs_type, rhs_type) + // Prefer i64 over u64, data lossy is expected + (UInt64, data_type) | (data_type, UInt64) => { + if data_type.is_signed_integer() { + Some(Int64) + } else { + Some(UInt64) + } } - (Decimal256(_, _), _) => get_comparison_common_decimal_type(lhs_type, rhs_type), - (_, Decimal256(_, _)) => get_comparison_common_decimal_type(rhs_type, lhs_type), - (Float64, _) | (_, Float64) => Some(Float64), - (_, Float32) | (Float32, _) => Some(Float32), - // The following match arms encode the following logic: Given the two - // integral types, we choose the narrowest possible integral type that - // accommodates all values of both types. Note that some information - // loss is inevitable when we have a signed type and a `UInt64`, in - // which case we use `Int64`;i.e. the widest signed integral type. - (Int64, _) - | (_, Int64) - | (UInt64, Int8) - | (Int8, UInt64) - | (UInt64, Int16) - | (Int16, UInt64) - | (UInt64, Int32) - | (Int32, UInt64) - | (UInt32, Int8) - | (Int8, UInt32) - | (UInt32, Int16) - | (Int16, UInt32) - | (UInt32, Int32) - | (Int32, UInt32) => Some(Int64), - (UInt64, _) | (_, UInt64) => Some(UInt64), - (Int32, _) - | (_, Int32) - | (UInt16, Int16) - | (Int16, UInt16) - | (UInt16, Int8) - | (Int8, UInt16) => Some(Int32), - (UInt32, _) | (_, UInt32) => Some(UInt32), - (Int16, _) | (_, Int16) | (Int8, UInt8) | (UInt8, Int8) => Some(Int16), - (UInt16, _) | (_, UInt16) => Some(UInt16), - (Int8, _) | (_, Int8) => Some(Int8), - (UInt8, _) | (_, UInt8) => Some(UInt8), + + // i64 + // Prefer f64 over i64, data lossy is expected + (Int64, Float32) | (Float32, Int64) | (Int64, Float16) | (Float16, Int64) => { + Some(Float64) + } + (Int64, _) | (_, Int64) => Some(Int64), + + // f32 + // f32 is not guaranteed to be able to represent all i32 values + (Float32, UInt32) | (UInt32, Float32) | (Float32, Int32) | (Int32, Float32) => { + Some(Float64) + } + (Float32, _) | (_, Float32) => Some(Float32), + + // u32 + (UInt32, Float16) | (Float16, UInt32) => Some(Float64), + (UInt32, data_type) | (data_type, UInt32) => { + if data_type.is_signed_integer() { + Some(Int64) + } else { + Some(UInt32) + } + } + + // i32 + // f32 is not guaranteed to be able to represent all i32 values, so f64 is preferred + (Int32, Float16) | (Float16, Int32) => Some(Float64), + (Int32, _) | (_, Int32) => Some(Int32), + + // f16 + (Float16, UInt16) | (UInt16, Float16) | (Float16, Int16) | (Int16, Float16) => { + Some(Float32) + } + (Float16, _) | (_, Float16) => Some(Float16), + + // u16 + (UInt16, data_type) | (data_type, UInt16) => { + if data_type.is_signed_integer() { + Some(Int32) + } else { + Some(UInt16) + } + } + + // i16 + (Int16, _) | (_, Int16) => Some(Int16), + + // u8 + (UInt8, UInt8) => Some(UInt8), + (UInt8, Int8) | (Int8, UInt8) => Some(Int16), + + // i8 + (Int8, Int8) => Some(Int8), + _ => None, } } +/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation +/// where both are numeric and the coerced type MAY not be the same as either input type. +fn numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { + if !lhs_type.is_numeric() || !rhs_type.is_numeric() { + return None; + }; + + if is_decimal(lhs_type) || is_decimal(rhs_type) { + return binary_decimal_coercion(lhs_type, rhs_type); + }; + + non_decimal_numeric_coercion(lhs_type, rhs_type) +} + +/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation +/// where both are numeric and the coerced type SHOULD be one of the input types. +pub fn exact_numeric_coercion(_: &DataType, _: &DataType) -> Option { + todo!("Implement this when we have a use case for it") +} + /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of /// a comparison operation where one is a decimal fn get_comparison_common_decimal_type( @@ -575,17 +653,19 @@ fn mathematics_numerical_coercion( (_, Dictionary(_, value_type)) => { mathematics_numerical_coercion(lhs_type, value_type) } - (Float64, _) | (_, Float64) => Some(Float64), - (_, Float32) | (Float32, _) => Some(Float32), - (Int64, _) | (_, Int64) => Some(Int64), - (Int32, _) | (_, Int32) => Some(Int32), - (Int16, _) | (_, Int16) => Some(Int16), - (Int8, _) | (_, Int8) => Some(Int8), - (UInt64, _) | (_, UInt64) => Some(UInt64), - (UInt32, _) | (_, UInt32) => Some(UInt32), - (UInt16, _) | (_, UInt16) => Some(UInt16), - (UInt8, _) | (_, UInt8) => Some(UInt8), - _ => None, + _ => { + // `math_decimal_coercion` does not handle coercion between Decimal and Float and Uint. + if is_decimal(lhs_type) && is_decimal(rhs_type) { + unreachable!("Should be handled in `math_decimal_coercion`") + } else if is_decimal(lhs_type) { + Some(rhs_type.to_owned()) + } else if is_decimal(rhs_type) { + Some(lhs_type.to_owned()) + } else { + // Both are non decimal numeric type + non_decimal_numeric_coercion(lhs_type, rhs_type) + } + } } } @@ -854,16 +934,6 @@ mod tests { use arrow::datatypes::DataType; use datafusion_common::{assert_contains, Result}; - #[test] - fn test_coercion_error() -> Result<()> { - let result_type = - get_input_types(&DataType::Float32, &Operator::Plus, &DataType::Utf8); - - let e = result_type.unwrap_err(); - assert_eq!(e.strip_backtrace(), "Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types"); - Ok(()) - } - #[test] fn test_decimal_binary_comparison_coercion() -> Result<()> { let input_decimal = DataType::Decimal128(20, 3); @@ -1222,44 +1292,6 @@ mod tests { Ok(()) } - #[test] - fn test_type_coercion_arithmetic() -> Result<()> { - // integer - test_coercion_binary_rule!( - DataType::Int32, - DataType::UInt32, - Operator::Plus, - DataType::Int32 - ); - test_coercion_binary_rule!( - DataType::Int32, - DataType::UInt16, - Operator::Minus, - DataType::Int32 - ); - test_coercion_binary_rule!( - DataType::Int8, - DataType::Int64, - Operator::Multiply, - DataType::Int64 - ); - // float - test_coercion_binary_rule!( - DataType::Float32, - DataType::Int32, - Operator::Plus, - DataType::Float32 - ); - test_coercion_binary_rule!( - DataType::Float32, - DataType::Float64, - Operator::Multiply, - DataType::Float64 - ); - // TODO add other data type - Ok(()) - } - fn test_math_decimal_coercion_rule( lhs_type: DataType, rhs_type: DataType, @@ -1333,7 +1365,7 @@ mod tests { DataType::Float32, DataType::Int64, Operator::Eq, - DataType::Float32 + DataType::Float64 ); test_coercion_binary_rule!( DataType::Float32, diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 7e6fb6b355ab..8e99bd7d27fe 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -415,7 +415,7 @@ mod tests { let expected = "Projection: COUNT(alias1) AS COUNT(DISTINCT Int32(2) * test.b) [COUNT(DISTINCT Int32(2) * test.b):Int64;N]\ \n Aggregate: groupBy=[[]], aggr=[[COUNT(alias1)]] [COUNT(alias1):Int64;N]\ - \n Aggregate: groupBy=[[Int32(2) * test.b AS alias1]], aggr=[[]] [alias1:Int32]\ + \n Aggregate: groupBy=[[Int32(2) * test.b AS alias1]], aggr=[[]] [alias1:Int64]\ \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; assert_optimized_plan_equal(&plan, expected) @@ -511,9 +511,9 @@ mod tests { .build()?; // Should work - let expected = "Projection: group_alias_0 AS test.a + Int32(1), COUNT(alias1) AS COUNT(DISTINCT test.c) [test.a + Int32(1):Int32, COUNT(DISTINCT test.c):Int64;N]\ - \n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]] [group_alias_0:Int32, COUNT(alias1):Int64;N]\ - \n Aggregate: groupBy=[[test.a + Int32(1) AS group_alias_0, test.c AS alias1]], aggr=[[]] [group_alias_0:Int32, alias1:UInt32]\ + let expected = "Projection: group_alias_0 AS test.a + Int32(1), COUNT(alias1) AS COUNT(DISTINCT test.c) [test.a + Int32(1):Int64, COUNT(DISTINCT test.c):Int64;N]\ + \n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]] [group_alias_0:Int64, COUNT(alias1):Int64;N]\ + \n Aggregate: groupBy=[[test.a + Int32(1) AS group_alias_0, test.c AS alias1]], aggr=[[]] [group_alias_0:Int64, alias1:UInt32]\ \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]"; assert_optimized_plan_equal(&plan, expected) diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index e593b07361e2..12ae9effbed0 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -313,8 +313,8 @@ fn push_down_filter_groupby_expr_contains_alias() { let sql = "SELECT * FROM (SELECT (col_int32 + col_uint32) AS c, count(*) FROM test GROUP BY 1) where c > 3"; let plan = test_sql(sql).unwrap(); let expected = "Projection: test.col_int32 + test.col_uint32 AS c, COUNT(*)\ - \n Aggregate: groupBy=[[test.col_int32 + CAST(test.col_uint32 AS Int32)]], aggr=[[COUNT(UInt8(1)) AS COUNT(*)]]\ - \n Filter: test.col_int32 + CAST(test.col_uint32 AS Int32) > Int32(3)\ + \n Aggregate: groupBy=[[CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64)]], aggr=[[COUNT(UInt8(1)) AS COUNT(*)]]\ + \n Filter: CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64) > Int64(3)\ \n TableScan: test projection=[col_int32, col_uint32]"; assert_eq!(expected, format!("{plan:?}")); } diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 9c7fdd2e814b..c5ac61a26265 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -794,9 +794,9 @@ mod tests { DataType::UInt32, vec![1u32, 2u32], Operator::Plus, - Int32Array, - DataType::Int32, - [2i32, 4i32], + Int64Array, + DataType::Int64, + [2i64, 4i64], ); test_coercion!( Int32Array, diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 3b45d995e1a2..8e72c089e53f 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -654,15 +654,15 @@ select make_array(b, c), make_array(e, f) from values; query ? select make_array(a, b, c, d) from values; ---- -[1.0, 1.0, 2.0, 1.1] -[2.0, 3.0, 4.0, 2.2] -[3.0, 5.0, 6.0, 3.3] -[4.0, 7.0, 8.0, 4.4] +[1.0, 1.0, 2.0, 1.100000023841858] +[2.0, 3.0, 4.0, 2.200000047683716] +[3.0, 5.0, 6.0, 3.299999952316284] +[4.0, 7.0, 8.0, 4.400000095367432] [, 9.0, 10.0, 5.5] -[5.0, , 12.0, 6.6] -[6.0, 11.0, , 7.7] +[5.0, , 12.0, 6.599999904632568] +[6.0, 11.0, , 7.699999809265137] [7.0, 13.0, 14.0, ] -[8.0, 15.0, 16.0, 8.8] +[8.0, 15.0, 16.0, 8.800000190734863] # make_array with column of list query ?? @@ -2975,6 +2975,18 @@ select make_array(1.0, '2', null) ---- [1.0, 2, ] +# make_array scalar function #5 +query error +select + make_array(arrow_cast(1, 'Int8'), arrow_cast(18446744073709551610, 'UInt64')), + arrow_typeof(make_array(arrow_cast(1, 'Int8'), arrow_cast(18446744073709551610, 'UInt64'))) +; +---- +DataFusion error: Optimizer rule 'simplify_expressions' failed +caused by +Arrow error: Cast error: Can't cast value 18446744073709551610 to type Int64 + + ### FixedSizeListArray statement ok diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index b3597c664fbb..b5e859a6bf20 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1927,6 +1927,139 @@ B false C false D false + +# Test Binary Coercion + +## Numeric Coercion + +query TTTTTT +select + arrow_typeof(arrow_cast(1.2, 'Float64') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(1.2, 'Float64') + arrow_cast(-3, 'Int64')), + arrow_typeof(arrow_cast(64, 'UInt64') + arrow_cast(2.3, 'Float32')), + arrow_typeof(arrow_cast(-64, 'Int64') + arrow_cast(2.3, 'Float32')), + arrow_typeof(arrow_cast(32, 'UInt32') + arrow_cast(2.3, 'Float32')), + arrow_typeof(arrow_cast(-32, 'Int32') + arrow_cast(2.3, 'Float32')) +; +---- +Float64 Float64 Float64 Float64 Float64 Float64 + +query TTTT +select + arrow_typeof(arrow_cast(1, 'UInt64') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(2, 'UInt32') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(3, 'UInt16') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(4, 'UInt8') + arrow_cast(3, 'UInt64')) +; +---- +UInt64 UInt64 UInt64 UInt64 + +query TTTTTTTTTTTTTT +select + arrow_typeof(arrow_cast(-1, 'Int64') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(-2, 'Int32') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(-3, 'Int16') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(-4, 'Int8') + arrow_cast(3, 'UInt64')), + arrow_typeof(arrow_cast(-64, 'Int64') + arrow_cast(3, 'Int64')), + arrow_typeof(arrow_cast(32, 'UInt32') + arrow_cast(3, 'Int64')), + arrow_typeof(arrow_cast(-32, 'Int32') + arrow_cast(3, 'Int64')), + arrow_typeof(arrow_cast(16, 'UInt16') + arrow_cast(3, 'Int64')), + arrow_typeof(arrow_cast(-16, 'Int16') + arrow_cast(3, 'Int64')), + arrow_typeof(arrow_cast(8, 'UInt8') + arrow_cast(3, 'Int64')), + arrow_typeof(arrow_cast(-8, 'Int8') + arrow_cast(3, 'Int64')), + arrow_typeof(arrow_cast(-9, 'Int32') + arrow_cast(3, 'UInt32')), + arrow_typeof(arrow_cast(-10, 'Int16') + arrow_cast(3, 'UInt32')), + arrow_typeof(arrow_cast(-11, 'Int8') + arrow_cast(3, 'UInt32')) +; +---- +Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 + +query TTTTT +select + arrow_typeof(arrow_cast(1.2, 'Float32') + arrow_cast(3.4, 'Float32')), + arrow_typeof(arrow_cast(16, 'UInt16') + arrow_cast(3.4, 'Float32')), + arrow_typeof(arrow_cast(-16, 'Int16') + arrow_cast(3.4, 'Float32')), + arrow_typeof(arrow_cast(8, 'UInt8') + arrow_cast(3.4, 'Float32')), + arrow_typeof(arrow_cast(-8, 'Int8') + arrow_cast(3.4, 'Float32')) +; +---- +Float32 Float32 Float32 Float32 Float32 + +query TTT +select + arrow_typeof(arrow_cast(1, 'UInt32') + arrow_cast(2, 'UInt32')), + arrow_typeof(arrow_cast(2, 'UInt32') + arrow_cast(2, 'UInt16')), + arrow_typeof(arrow_cast(3, 'UInt32') + arrow_cast(2, 'UInt8')) +; +---- +UInt32 UInt32 UInt32 + +query TTTTT +select + arrow_typeof(arrow_cast(1, 'Int32') + arrow_cast(2, 'Int32')), + arrow_typeof(arrow_cast(2, 'Int32') + arrow_cast(2, 'Int16')), + arrow_typeof(arrow_cast(3, 'Int32') + arrow_cast(2, 'Int8')), + arrow_typeof(arrow_cast(4, 'UInt16') + arrow_cast(2, 'Int16')), + arrow_typeof(arrow_cast(5, 'UInt16') + arrow_cast(2, 'Int8')) +; +---- +Int32 Int32 Int32 Int32 Int32 + +query TT +select + arrow_typeof(arrow_cast(1, 'UInt16') + arrow_cast(2, 'UInt16')), + arrow_typeof(arrow_cast(2, 'UInt8') + arrow_cast(2, 'UInt16')) +; +---- +UInt16 UInt16 + +query TTTT +select + arrow_typeof(arrow_cast(1, 'Int16') + arrow_cast(2, 'Int16')), + arrow_typeof(arrow_cast(2, 'Int16') + arrow_cast(2, 'UInt8')), + arrow_typeof(arrow_cast(3, 'Int16') + arrow_cast(2, 'Int8')), + arrow_typeof(arrow_cast(4, 'UInt8') + arrow_cast(2, 'Int8')) +; +---- +Int16 Int16 Int16 Int16 + +query T +select arrow_typeof(arrow_cast(1, 'UInt8') + arrow_cast(2, 'UInt8')); +---- +UInt8 + +query T +select arrow_typeof(arrow_cast(1, 'Int8') + arrow_cast(2, 'Int8')); +---- +Int8 + +# test decimal coercion +query BB +select + arrow_cast(1, 'Decimal128(10, 0)') == arrow_cast(1, 'Int64'), + arrow_cast(2, 'Int64') < arrow_cast(1, 'Decimal128(10, 0)') +; +---- +true false + +## decimal precision and scale +query TRTR +select + arrow_typeof(arrow_cast(1, 'Decimal128(10, 0)') + arrow_cast(1, 'Int64')), + arrow_cast(1, 'Decimal128(10, 0)') + arrow_cast(1, 'Int64'), + arrow_typeof(arrow_cast(1, 'Decimal128(10, 2)') + arrow_cast(0, 'Int64')), + arrow_cast(1, 'Decimal128(10, 2)') + arrow_cast(0, 'Int64') +; +---- +Decimal128(21, 0) 2 Decimal128(23, 2) 1 + +query B +select + arrow_cast(2, 'Int64') < arrow_cast(1, 'Decimal128(10, 0)') +; +---- +false + # test string_temporal_coercion query BBBBBBBBBB select @@ -1943,3 +2076,7 @@ select ; ---- true true true true true true true true true true + +# test coercion error +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Float32 \+ Utf8 to valid types +select arrow_cast(1.2, 'Float32') + arrow_cast('2.1', 'Utf8'); diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index b2491478d84e..2d4bcca06afa 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -2412,16 +2412,16 @@ EXPLAIN SELECT c5, c9, rn1 FROM (SELECT c5, c9, ---- logical_plan Limit: skip=0, fetch=5 ---Sort: rn1 ASC NULLS LAST, CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST, fetch=5 -----Sort: CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST +--Sort: rn1 ASC NULLS LAST, CAST(aggregate_test_100.c9 AS Int64) + CAST(aggregate_test_100.c5 AS Int64) DESC NULLS FIRST, fetch=5 +----Sort: CAST(aggregate_test_100.c9 AS Int64) + CAST(aggregate_test_100.c5 AS Int64) DESC NULLS FIRST ------Projection: aggregate_test_100.c5, aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn1 ---------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [CAST(aggregate_test_100.c9 AS Int32) + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +--------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [CAST(aggregate_test_100.c9 AS Int64) + CAST(aggregate_test_100.c5 AS Int64) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] ----------TableScan: aggregate_test_100 projection=[c5, c9] physical_plan GlobalLimitExec: skip=0, fetch=5 --ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1] -----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] -------SortExec: expr=[CAST(c9@1 AS Int32) + c5@0 DESC] +----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted] +------SortExec: expr=[CAST(c9@1 AS Int64) + CAST(c5@0 AS Int64) DESC] --------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5, c9], has_header=true # Ordering equivalence should be preserved during cast expression