@@ -22,7 +22,8 @@ use std::sync::Arc;
22
22
use arrow:: compute:: can_cast_types;
23
23
use arrow:: datatypes:: { DataType , FieldRef , Schema } ;
24
24
use datafusion_common:: {
25
- exec_err, plan_err,
25
+ exec_err,
26
+ nested_struct:: validate_struct_compatibility,
26
27
tree_node:: { Transformed , TransformedResult , TreeNode } ,
27
28
Result , ScalarValue ,
28
29
} ;
@@ -34,79 +35,6 @@ use datafusion_physical_expr::{
34
35
} ;
35
36
use datafusion_physical_expr_common:: physical_expr:: PhysicalExpr ;
36
37
37
- /// Validates compatibility between source and target struct fields for casting operations.
38
- ///
39
- /// This function implements comprehensive struct compatibility checking by examining:
40
- /// - Field name matching between source and target structs
41
- /// - Type castability for each matching field (including recursive struct validation)
42
- /// - Proper handling of missing fields (target fields not in source are allowed - filled with nulls)
43
- /// - Proper handling of extra fields (source fields not in target are allowed - ignored)
44
- ///
45
- /// # Compatibility Rules
46
- /// - **Field Matching**: Fields are matched by name (case-sensitive)
47
- /// - **Missing Target Fields**: Allowed - will be filled with null values during casting
48
- /// - **Extra Source Fields**: Allowed - will be ignored during casting
49
- /// - **Type Compatibility**: Each matching field must be castable using Arrow's type system
50
- /// - **Nested Structs**: Recursively validates nested struct compatibility
51
- ///
52
- /// # Arguments
53
- /// * `source_fields` - Fields from the source struct type
54
- /// * `target_fields` - Fields from the target struct type
55
- ///
56
- /// # Returns
57
- /// * `Ok(true)` if the structs are compatible for casting
58
- /// * `Err(DataFusionError)` with detailed error message if incompatible
59
- ///
60
- /// # Examples
61
- /// ```text
62
- /// // Compatible: source has extra field, target has missing field
63
- /// // Source: {a: i32, b: string, c: f64}
64
- /// // Target: {a: i64, d: bool}
65
- /// // Result: Ok(true) - 'a' can cast i32->i64, 'b','c' ignored, 'd' filled with nulls
66
- ///
67
- /// // Incompatible: matching field has incompatible types
68
- /// // Source: {a: string}
69
- /// // Target: {a: binary}
70
- /// // Result: Err(...) - string cannot cast to binary
71
- /// ```
72
- pub fn validate_struct_compatibility (
73
- source_fields : & [ FieldRef ] ,
74
- target_fields : & [ FieldRef ] ,
75
- ) -> Result < bool > {
76
- // Check compatibility for each target field
77
- for target_field in target_fields {
78
- // Look for matching field in source by name
79
- if let Some ( source_field) = source_fields
80
- . iter ( )
81
- . find ( |f| f. name ( ) == target_field. name ( ) )
82
- {
83
- // Check if the matching field types are compatible
84
- match ( source_field. data_type ( ) , target_field. data_type ( ) ) {
85
- // Recursively validate nested structs
86
- ( DataType :: Struct ( source_nested) , DataType :: Struct ( target_nested) ) => {
87
- validate_struct_compatibility ( source_nested, target_nested) ?;
88
- }
89
- // For non-struct types, use the existing castability check
90
- _ => {
91
- if !can_cast_types ( source_field. data_type ( ) , target_field. data_type ( ) )
92
- {
93
- return plan_err ! (
94
- "Cannot cast struct field '{}' from type {:?} to type {:?}" ,
95
- target_field. name( ) ,
96
- source_field. data_type( ) ,
97
- target_field. data_type( )
98
- ) ;
99
- }
100
- }
101
- }
102
- }
103
- // Missing fields in source are OK - they'll be filled with nulls
104
- }
105
-
106
- // Extra fields in source are OK - they'll be ignored
107
- Ok ( true )
108
- }
109
-
110
38
/// Build a struct expression by recursively extracting and rewriting fields from a source struct.
111
39
///
112
40
/// This function creates a new struct expression by:
@@ -426,72 +354,6 @@ mod tests {
426
354
insta:: assert_snapshot!( expression_to_sql( & result) , @"CAST(a AS Int64)" ) ;
427
355
}
428
356
429
- #[ test]
430
- fn test_validate_struct_compatibility_compatible ( ) -> Result < ( ) > {
431
- // Source struct: {a: Int32, b: Utf8}
432
- let source_fields = vec ! [
433
- Arc :: new( Field :: new( "a" , DataType :: Int32 , true ) ) ,
434
- Arc :: new( Field :: new( "b" , DataType :: Utf8 , true ) ) ,
435
- ] ;
436
-
437
- // Target struct: {a: Int64, c: Float64} (Int32 can cast to Int64, missing field c is OK)
438
- let target_fields = vec ! [
439
- Arc :: new( Field :: new( "a" , DataType :: Int64 , true ) ) ,
440
- Arc :: new( Field :: new( "c" , DataType :: Float64 , true ) ) ,
441
- ] ;
442
-
443
- let result = validate_struct_compatibility ( & source_fields, & target_fields) ?;
444
- assert ! ( result) ;
445
- Ok ( ( ) )
446
- }
447
-
448
- #[ test]
449
- fn test_validate_struct_compatibility_incompatible ( ) {
450
- // Source struct: {a: Binary}
451
- let source_fields = vec ! [ Arc :: new( Field :: new( "a" , DataType :: Binary , true ) ) ] ;
452
-
453
- // Target struct: {a: Int32} (Binary cannot cast to Int32)
454
- let target_fields = vec ! [ Arc :: new( Field :: new( "a" , DataType :: Int32 , true ) ) ] ;
455
-
456
- let result = validate_struct_compatibility ( & source_fields, & target_fields) ;
457
- assert ! ( result. is_err( ) ) ;
458
- let error_msg = result. unwrap_err ( ) . to_string ( ) ;
459
- assert ! ( error_msg. contains( "Cannot cast struct field 'a'" ) ) ;
460
- assert ! ( error_msg. contains( "Binary" ) ) ;
461
- assert ! ( error_msg. contains( "Int32" ) ) ;
462
- }
463
-
464
- #[ test]
465
- fn test_validate_struct_compatibility_nested_structs ( ) -> Result < ( ) > {
466
- // Source nested struct: {info: {name: Utf8, age: Int32}}
467
- let source_nested_fields = vec ! [
468
- Arc :: new( Field :: new( "name" , DataType :: Utf8 , true ) ) ,
469
- Arc :: new( Field :: new( "age" , DataType :: Int32 , true ) ) ,
470
- ] ;
471
- let source_fields = vec ! [ Arc :: new( Field :: new(
472
- "info" ,
473
- DataType :: Struct ( source_nested_fields. into( ) ) ,
474
- true ,
475
- ) ) ] ;
476
-
477
- // Target nested struct: {info: {name: Utf8, age: Int64, location: Utf8}}
478
- // (Int32 can cast to Int64, missing location field is OK)
479
- let target_nested_fields = vec ! [
480
- Arc :: new( Field :: new( "name" , DataType :: Utf8 , true ) ) ,
481
- Arc :: new( Field :: new( "age" , DataType :: Int64 , true ) ) ,
482
- Arc :: new( Field :: new( "location" , DataType :: Utf8 , true ) ) ,
483
- ] ;
484
- let target_fields = vec ! [ Arc :: new( Field :: new(
485
- "info" ,
486
- DataType :: Struct ( target_nested_fields. into( ) ) ,
487
- true ,
488
- ) ) ] ;
489
-
490
- let result = validate_struct_compatibility ( & source_fields, & target_fields) ?;
491
- assert ! ( result) ;
492
- Ok ( ( ) )
493
- }
494
-
495
357
#[ test]
496
358
fn test_rewrite_struct_column_compatibility ( ) -> Result < ( ) > {
497
359
// Test that struct compatibility validation is used in schema rewriting
0 commit comments