@@ -44,6 +44,8 @@ use crate::{
44
44
TableProviderFilterPushDown , TableSource , WriteOp ,
45
45
} ;
46
46
47
+ use super :: dml:: InsertOp ;
48
+ use super :: plan:: ColumnUnnestList ;
47
49
use arrow:: datatypes:: { DataType , Field , Fields , Schema , SchemaRef } ;
48
50
use datafusion_common:: display:: ToStringifiedPlan ;
49
51
use datafusion_common:: file_options:: file_type:: FileType ;
@@ -54,9 +56,6 @@ use datafusion_common::{
54
56
} ;
55
57
use datafusion_expr_common:: type_coercion:: binary:: type_union_resolution;
56
58
57
- use super :: dml:: InsertOp ;
58
- use super :: plan:: { ColumnUnnestList , ColumnUnnestType } ;
59
-
60
59
/// Default table name for unnamed table
61
60
pub const UNNAMED_TABLE : & str = "?table?" ;
62
61
@@ -1186,7 +1185,7 @@ impl LogicalPlanBuilder {
1186
1185
) -> Result < Self > {
1187
1186
unnest_with_options (
1188
1187
Arc :: unwrap_or_clone ( self . plan ) ,
1189
- vec ! [ ( column. into( ) , ColumnUnnestType :: Inferred ) ] ,
1188
+ vec ! [ column. into( ) ] ,
1190
1189
options,
1191
1190
)
1192
1191
. map ( Self :: new)
@@ -1197,26 +1196,6 @@ impl LogicalPlanBuilder {
1197
1196
self ,
1198
1197
columns : Vec < Column > ,
1199
1198
options : UnnestOptions ,
1200
- ) -> Result < Self > {
1201
- unnest_with_options (
1202
- Arc :: unwrap_or_clone ( self . plan ) ,
1203
- columns
1204
- . into_iter ( )
1205
- . map ( |c| ( c, ColumnUnnestType :: Inferred ) )
1206
- . collect ( ) ,
1207
- options,
1208
- )
1209
- . map ( Self :: new)
1210
- }
1211
-
1212
- /// Unnest the given columns with the given [`UnnestOptions`]
1213
- /// if one column is a list type, it can be recursively and simultaneously
1214
- /// unnested into the desired recursion levels
1215
- /// e.g select unnest(list_col,depth=1), unnest(list_col,depth=2)
1216
- pub fn unnest_columns_recursive_with_options (
1217
- self ,
1218
- columns : Vec < ( Column , ColumnUnnestType ) > ,
1219
- options : UnnestOptions ,
1220
1199
) -> Result < Self > {
1221
1200
unnest_with_options ( Arc :: unwrap_or_clone ( self . plan ) , columns, options)
1222
1201
. map ( Self :: new)
@@ -1594,14 +1573,12 @@ impl TableSource for LogicalTableSource {
1594
1573
1595
1574
/// Create a [`LogicalPlan::Unnest`] plan
1596
1575
pub fn unnest ( input : LogicalPlan , columns : Vec < Column > ) -> Result < LogicalPlan > {
1597
- let unnestings = columns
1598
- . into_iter ( )
1599
- . map ( |c| ( c, ColumnUnnestType :: Inferred ) )
1600
- . collect ( ) ;
1601
- unnest_with_options ( input, unnestings, UnnestOptions :: default ( ) )
1576
+ unnest_with_options ( input, columns, UnnestOptions :: default ( ) )
1602
1577
}
1603
1578
1604
- pub fn get_unnested_list_datatype_recursive (
1579
+ // Get the data type of a multi-dimensional type after unnesting it
1580
+ // with a given depth
1581
+ fn get_unnested_list_datatype_recursive (
1605
1582
data_type : & DataType ,
1606
1583
depth : usize ,
1607
1584
) -> Result < DataType > {
@@ -1620,27 +1597,6 @@ pub fn get_unnested_list_datatype_recursive(
1620
1597
internal_err ! ( "trying to unnest on invalid data type {:?}" , data_type)
1621
1598
}
1622
1599
1623
- /// Infer the unnest type based on the data type:
1624
- /// - list type: infer to unnest(list(col, depth=1))
1625
- /// - struct type: infer to unnest(struct)
1626
- fn infer_unnest_type (
1627
- col_name : & String ,
1628
- data_type : & DataType ,
1629
- ) -> Result < ColumnUnnestType > {
1630
- match data_type {
1631
- DataType :: List ( _) | DataType :: FixedSizeList ( _, _) | DataType :: LargeList ( _) => {
1632
- Ok ( ColumnUnnestType :: List ( vec ! [ ColumnUnnestList {
1633
- output_column: Column :: from_name( col_name) ,
1634
- depth: 1 ,
1635
- } ] ) )
1636
- }
1637
- DataType :: Struct ( _) => Ok ( ColumnUnnestType :: Struct ) ,
1638
- _ => {
1639
- internal_err ! ( "trying to unnest on invalid data type {:?}" , data_type)
1640
- }
1641
- }
1642
- }
1643
-
1644
1600
pub fn get_struct_unnested_columns (
1645
1601
col_name : & String ,
1646
1602
inner_fields : & Fields ,
@@ -1729,20 +1685,15 @@ pub fn get_unnested_columns(
1729
1685
/// ```
1730
1686
pub fn unnest_with_options (
1731
1687
input : LogicalPlan ,
1732
- columns_to_unnest : Vec < ( Column , ColumnUnnestType ) > ,
1688
+ columns_to_unnest : Vec < Column > ,
1733
1689
options : UnnestOptions ,
1734
1690
) -> Result < LogicalPlan > {
1735
1691
let mut list_columns: Vec < ( usize , ColumnUnnestList ) > = vec ! [ ] ;
1736
1692
let mut struct_columns = vec ! [ ] ;
1737
1693
let indices_to_unnest = columns_to_unnest
1738
1694
. iter ( )
1739
- . map ( |col_unnesting| {
1740
- Ok ( (
1741
- input. schema ( ) . index_of_column ( & col_unnesting. 0 ) ?,
1742
- col_unnesting,
1743
- ) )
1744
- } )
1745
- . collect :: < Result < HashMap < usize , & ( Column , ColumnUnnestType ) > > > ( ) ?;
1695
+ . map ( |c| Ok ( ( input. schema ( ) . index_of_column ( c) ?, c) ) )
1696
+ . collect :: < Result < HashMap < usize , & Column > > > ( ) ?;
1746
1697
1747
1698
let input_schema = input. schema ( ) ;
1748
1699
@@ -1767,51 +1718,59 @@ pub fn unnest_with_options(
1767
1718
. enumerate ( )
1768
1719
. map ( |( index, ( original_qualifier, original_field) ) | {
1769
1720
match indices_to_unnest. get ( & index) {
1770
- Some ( ( column_to_unnest, unnest_type) ) => {
1771
- let mut inferred_unnest_type = unnest_type. clone ( ) ;
1772
- if let ColumnUnnestType :: Inferred = unnest_type {
1773
- inferred_unnest_type = infer_unnest_type (
1721
+ Some ( column_to_unnest) => {
1722
+ let recursions_on_column = options
1723
+ . recursions
1724
+ . iter ( )
1725
+ . filter ( |p| -> bool { & p. input_column == * column_to_unnest } )
1726
+ . collect :: < Vec < _ > > ( ) ;
1727
+ let mut transformed_columns = recursions_on_column
1728
+ . iter ( )
1729
+ . map ( |r| {
1730
+ list_columns. push ( (
1731
+ index,
1732
+ ColumnUnnestList {
1733
+ output_column : r. output_column . clone ( ) ,
1734
+ depth : r. depth ,
1735
+ } ,
1736
+ ) ) ;
1737
+ Ok ( get_unnested_columns (
1738
+ & r. output_column . name ,
1739
+ original_field. data_type ( ) ,
1740
+ r. depth ,
1741
+ ) ?
1742
+ . into_iter ( )
1743
+ . next ( )
1744
+ . unwrap ( ) ) // because unnesting a list column always result into one result
1745
+ } )
1746
+ . collect :: < Result < Vec < ( Column , Arc < Field > ) > > > ( ) ?;
1747
+ if transformed_columns. is_empty ( ) {
1748
+ transformed_columns = get_unnested_columns (
1774
1749
& column_to_unnest. name ,
1775
1750
original_field. data_type ( ) ,
1751
+ 1 ,
1776
1752
) ?;
1777
- }
1778
- let transformed_columns: Vec < ( Column , Arc < Field > ) > =
1779
- match inferred_unnest_type {
1780
- ColumnUnnestType :: Struct => {
1753
+ match original_field. data_type ( ) {
1754
+ DataType :: Struct ( _) => {
1781
1755
struct_columns. push ( index) ;
1782
- get_unnested_columns (
1783
- & column_to_unnest. name ,
1784
- original_field. data_type ( ) ,
1785
- 1 ,
1786
- ) ?
1787
1756
}
1788
- ColumnUnnestType :: List ( unnest_lists) => {
1789
- list_columns. extend (
1790
- unnest_lists
1791
- . iter ( )
1792
- . map ( |ul| ( index, ul. to_owned ( ) . clone ( ) ) ) ,
1793
- ) ;
1794
- unnest_lists
1795
- . iter ( )
1796
- . map (
1797
- |ColumnUnnestList {
1798
- output_column,
1799
- depth,
1800
- } | {
1801
- get_unnested_columns (
1802
- & output_column. name ,
1803
- original_field. data_type ( ) ,
1804
- * depth,
1805
- )
1806
- } ,
1807
- )
1808
- . collect :: < Result < Vec < Vec < ( Column , Arc < Field > ) > > > > ( ) ?
1809
- . into_iter ( )
1810
- . flatten ( )
1811
- . collect :: < Vec < _ > > ( )
1757
+ DataType :: List ( _)
1758
+ | DataType :: FixedSizeList ( _, _)
1759
+ | DataType :: LargeList ( _) => {
1760
+ list_columns. push ( (
1761
+ index,
1762
+ ColumnUnnestList {
1763
+ output_column : Column :: from_name (
1764
+ & column_to_unnest. name ,
1765
+ ) ,
1766
+ depth : 1 ,
1767
+ } ,
1768
+ ) ) ;
1812
1769
}
1813
- _ => return internal_err ! ( "Invalid unnest type" ) ,
1770
+ _ => { }
1814
1771
} ;
1772
+ }
1773
+
1815
1774
// new columns dependent on the same original index
1816
1775
dependency_indices
1817
1776
. extend ( std:: iter:: repeat ( index) . take ( transformed_columns. len ( ) ) ) ;
@@ -1860,7 +1819,7 @@ mod tests {
1860
1819
use crate :: logical_plan:: StringifiedPlan ;
1861
1820
use crate :: { col, expr, expr_fn:: exists, in_subquery, lit, scalar_subquery} ;
1862
1821
1863
- use datafusion_common:: SchemaError ;
1822
+ use datafusion_common:: { RecursionUnnestOption , SchemaError } ;
1864
1823
1865
1824
#[ test]
1866
1825
fn plan_builder_simple ( ) -> Result < ( ) > {
@@ -2268,24 +2227,19 @@ mod tests {
2268
2227
2269
2228
// Simultaneously unnesting a list (with different depth) and a struct column
2270
2229
let plan = nested_table_scan ( "test_table" ) ?
2271
- . unnest_columns_recursive_with_options (
2272
- vec ! [
2273
- (
2274
- "stringss" . into( ) ,
2275
- ColumnUnnestType :: List ( vec![
2276
- ColumnUnnestList {
2277
- output_column: Column :: from_name( "stringss_depth_1" ) ,
2278
- depth: 1 ,
2279
- } ,
2280
- ColumnUnnestList {
2281
- output_column: Column :: from_name( "stringss_depth_2" ) ,
2282
- depth: 2 ,
2283
- } ,
2284
- ] ) ,
2285
- ) ,
2286
- ( "struct_singular" . into( ) , ColumnUnnestType :: Inferred ) ,
2287
- ] ,
2288
- UnnestOptions :: default ( ) ,
2230
+ . unnest_columns_with_options (
2231
+ vec ! [ "stringss" . into( ) , "struct_singular" . into( ) ] ,
2232
+ UnnestOptions :: default ( )
2233
+ . with_recursions ( RecursionUnnestOption {
2234
+ input_column : "stringss" . into ( ) ,
2235
+ output_column : "stringss_depth_1" . into ( ) ,
2236
+ depth : 1 ,
2237
+ } )
2238
+ . with_recursions ( RecursionUnnestOption {
2239
+ input_column : "stringss" . into ( ) ,
2240
+ output_column : "stringss_depth_2" . into ( ) ,
2241
+ depth : 2 ,
2242
+ } ) ,
2289
2243
) ?
2290
2244
. build ( ) ?;
2291
2245
0 commit comments