Skip to content

Commit 22196b5

Browse files
committed
remove duplication
1 parent 070c134 commit 22196b5

File tree

1 file changed

+2
-140
lines changed

1 file changed

+2
-140
lines changed

datafusion/physical-expr-adapter/src/schema_rewriter.rs

Lines changed: 2 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ use std::sync::Arc;
2222
use arrow::compute::can_cast_types;
2323
use arrow::datatypes::{DataType, FieldRef, Schema};
2424
use datafusion_common::{
25-
exec_err, plan_err,
25+
exec_err,
26+
nested_struct::validate_struct_compatibility,
2627
tree_node::{Transformed, TransformedResult, TreeNode},
2728
Result, ScalarValue,
2829
};
@@ -34,79 +35,6 @@ use datafusion_physical_expr::{
3435
};
3536
use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
3637

37-
/// Validates compatibility between source and target struct fields for casting operations.
38-
///
39-
/// This function implements comprehensive struct compatibility checking by examining:
40-
/// - Field name matching between source and target structs
41-
/// - Type castability for each matching field (including recursive struct validation)
42-
/// - Proper handling of missing fields (target fields not in source are allowed - filled with nulls)
43-
/// - Proper handling of extra fields (source fields not in target are allowed - ignored)
44-
///
45-
/// # Compatibility Rules
46-
/// - **Field Matching**: Fields are matched by name (case-sensitive)
47-
/// - **Missing Target Fields**: Allowed - will be filled with null values during casting
48-
/// - **Extra Source Fields**: Allowed - will be ignored during casting
49-
/// - **Type Compatibility**: Each matching field must be castable using Arrow's type system
50-
/// - **Nested Structs**: Recursively validates nested struct compatibility
51-
///
52-
/// # Arguments
53-
/// * `source_fields` - Fields from the source struct type
54-
/// * `target_fields` - Fields from the target struct type
55-
///
56-
/// # Returns
57-
/// * `Ok(true)` if the structs are compatible for casting
58-
/// * `Err(DataFusionError)` with detailed error message if incompatible
59-
///
60-
/// # Examples
61-
/// ```text
62-
/// // Compatible: source has extra field, target has missing field
63-
/// // Source: {a: i32, b: string, c: f64}
64-
/// // Target: {a: i64, d: bool}
65-
/// // Result: Ok(true) - 'a' can cast i32->i64, 'b','c' ignored, 'd' filled with nulls
66-
///
67-
/// // Incompatible: matching field has incompatible types
68-
/// // Source: {a: string}
69-
/// // Target: {a: binary}
70-
/// // Result: Err(...) - string cannot cast to binary
71-
/// ```
72-
pub fn validate_struct_compatibility(
73-
source_fields: &[FieldRef],
74-
target_fields: &[FieldRef],
75-
) -> Result<bool> {
76-
// Check compatibility for each target field
77-
for target_field in target_fields {
78-
// Look for matching field in source by name
79-
if let Some(source_field) = source_fields
80-
.iter()
81-
.find(|f| f.name() == target_field.name())
82-
{
83-
// Check if the matching field types are compatible
84-
match (source_field.data_type(), target_field.data_type()) {
85-
// Recursively validate nested structs
86-
(DataType::Struct(source_nested), DataType::Struct(target_nested)) => {
87-
validate_struct_compatibility(source_nested, target_nested)?;
88-
}
89-
// For non-struct types, use the existing castability check
90-
_ => {
91-
if !can_cast_types(source_field.data_type(), target_field.data_type())
92-
{
93-
return plan_err!(
94-
"Cannot cast struct field '{}' from type {:?} to type {:?}",
95-
target_field.name(),
96-
source_field.data_type(),
97-
target_field.data_type()
98-
);
99-
}
100-
}
101-
}
102-
}
103-
// Missing fields in source are OK - they'll be filled with nulls
104-
}
105-
106-
// Extra fields in source are OK - they'll be ignored
107-
Ok(true)
108-
}
109-
11038
/// Build a struct expression by recursively extracting and rewriting fields from a source struct.
11139
///
11240
/// This function creates a new struct expression by:
@@ -426,72 +354,6 @@ mod tests {
426354
insta::assert_snapshot!(expression_to_sql(&result), @"CAST(a AS Int64)");
427355
}
428356

429-
#[test]
430-
fn test_validate_struct_compatibility_compatible() -> Result<()> {
431-
// Source struct: {a: Int32, b: Utf8}
432-
let source_fields = vec![
433-
Arc::new(Field::new("a", DataType::Int32, true)),
434-
Arc::new(Field::new("b", DataType::Utf8, true)),
435-
];
436-
437-
// Target struct: {a: Int64, c: Float64} (Int32 can cast to Int64, missing field c is OK)
438-
let target_fields = vec![
439-
Arc::new(Field::new("a", DataType::Int64, true)),
440-
Arc::new(Field::new("c", DataType::Float64, true)),
441-
];
442-
443-
let result = validate_struct_compatibility(&source_fields, &target_fields)?;
444-
assert!(result);
445-
Ok(())
446-
}
447-
448-
#[test]
449-
fn test_validate_struct_compatibility_incompatible() {
450-
// Source struct: {a: Binary}
451-
let source_fields = vec![Arc::new(Field::new("a", DataType::Binary, true))];
452-
453-
// Target struct: {a: Int32} (Binary cannot cast to Int32)
454-
let target_fields = vec![Arc::new(Field::new("a", DataType::Int32, true))];
455-
456-
let result = validate_struct_compatibility(&source_fields, &target_fields);
457-
assert!(result.is_err());
458-
let error_msg = result.unwrap_err().to_string();
459-
assert!(error_msg.contains("Cannot cast struct field 'a'"));
460-
assert!(error_msg.contains("Binary"));
461-
assert!(error_msg.contains("Int32"));
462-
}
463-
464-
#[test]
465-
fn test_validate_struct_compatibility_nested_structs() -> Result<()> {
466-
// Source nested struct: {info: {name: Utf8, age: Int32}}
467-
let source_nested_fields = vec![
468-
Arc::new(Field::new("name", DataType::Utf8, true)),
469-
Arc::new(Field::new("age", DataType::Int32, true)),
470-
];
471-
let source_fields = vec![Arc::new(Field::new(
472-
"info",
473-
DataType::Struct(source_nested_fields.into()),
474-
true,
475-
))];
476-
477-
// Target nested struct: {info: {name: Utf8, age: Int64, location: Utf8}}
478-
// (Int32 can cast to Int64, missing location field is OK)
479-
let target_nested_fields = vec![
480-
Arc::new(Field::new("name", DataType::Utf8, true)),
481-
Arc::new(Field::new("age", DataType::Int64, true)),
482-
Arc::new(Field::new("location", DataType::Utf8, true)),
483-
];
484-
let target_fields = vec![Arc::new(Field::new(
485-
"info",
486-
DataType::Struct(target_nested_fields.into()),
487-
true,
488-
))];
489-
490-
let result = validate_struct_compatibility(&source_fields, &target_fields)?;
491-
assert!(result);
492-
Ok(())
493-
}
494-
495357
#[test]
496358
fn test_rewrite_struct_column_compatibility() -> Result<()> {
497359
// Test that struct compatibility validation is used in schema rewriting

0 commit comments

Comments
 (0)