@@ -779,42 +779,66 @@ fn parse(
779
779
match key_type. as_ref ( ) {
780
780
DataType :: Int8 => Ok ( Arc :: new (
781
781
rows. iter ( )
782
- . map ( |row| row. get ( i) )
782
+ . map ( |row| {
783
+ let s = row. get ( i) ;
784
+ ( !null_regex. is_null ( s) ) . then_some ( s)
785
+ } )
783
786
. collect :: < DictionaryArray < Int8Type > > ( ) ,
784
787
) as ArrayRef ) ,
785
788
DataType :: Int16 => Ok ( Arc :: new (
786
789
rows. iter ( )
787
- . map ( |row| row. get ( i) )
790
+ . map ( |row| {
791
+ let s = row. get ( i) ;
792
+ ( !null_regex. is_null ( s) ) . then_some ( s)
793
+ } )
788
794
. collect :: < DictionaryArray < Int16Type > > ( ) ,
789
795
) as ArrayRef ) ,
790
796
DataType :: Int32 => Ok ( Arc :: new (
791
797
rows. iter ( )
792
- . map ( |row| row. get ( i) )
798
+ . map ( |row| {
799
+ let s = row. get ( i) ;
800
+ ( !null_regex. is_null ( s) ) . then_some ( s)
801
+ } )
793
802
. collect :: < DictionaryArray < Int32Type > > ( ) ,
794
803
) as ArrayRef ) ,
795
804
DataType :: Int64 => Ok ( Arc :: new (
796
805
rows. iter ( )
797
- . map ( |row| row. get ( i) )
806
+ . map ( |row| {
807
+ let s = row. get ( i) ;
808
+ ( !null_regex. is_null ( s) ) . then_some ( s)
809
+ } )
798
810
. collect :: < DictionaryArray < Int64Type > > ( ) ,
799
811
) as ArrayRef ) ,
800
812
DataType :: UInt8 => Ok ( Arc :: new (
801
813
rows. iter ( )
802
- . map ( |row| row. get ( i) )
814
+ . map ( |row| {
815
+ let s = row. get ( i) ;
816
+ ( !null_regex. is_null ( s) ) . then_some ( s)
817
+ } )
803
818
. collect :: < DictionaryArray < UInt8Type > > ( ) ,
804
819
) as ArrayRef ) ,
805
820
DataType :: UInt16 => Ok ( Arc :: new (
806
821
rows. iter ( )
807
- . map ( |row| row. get ( i) )
822
+ . map ( |row| {
823
+ let s = row. get ( i) ;
824
+ ( !null_regex. is_null ( s) ) . then_some ( s)
825
+ } )
808
826
. collect :: < DictionaryArray < UInt16Type > > ( ) ,
809
827
) as ArrayRef ) ,
810
828
DataType :: UInt32 => Ok ( Arc :: new (
811
829
rows. iter ( )
812
- . map ( |row| row. get ( i) )
830
+ . map ( |row| {
831
+ let s = row. get ( i) ;
832
+ ( !null_regex. is_null ( s) ) . then_some ( s)
833
+ } )
813
834
. collect :: < DictionaryArray < UInt32Type > > ( ) ,
814
835
) as ArrayRef ) ,
815
836
DataType :: UInt64 => Ok ( Arc :: new (
816
837
rows. iter ( )
817
- . map ( |row| row. get ( i) )
838
+ . map ( |row| {
839
+ let s = row. get ( i) ;
840
+ ( !null_regex. is_null ( s) ) . then_some ( s)
841
+ } )
818
842
. collect :: < DictionaryArray < UInt64Type > > ( ) ,
819
843
) as ArrayRef ) ,
820
844
_ => Err ( ArrowError :: ParseError ( format ! (
@@ -1475,6 +1499,40 @@ mod tests {
1475
1499
assert_eq ! ( strings. value( 29 ) , "Uckfield, East Sussex, UK" ) ;
1476
1500
}
1477
1501
1502
+ #[ test]
1503
+ fn test_csv_with_nullable_dictionary ( ) {
1504
+ let offset_type = vec ! [
1505
+ DataType :: Int8 ,
1506
+ DataType :: Int16 ,
1507
+ DataType :: Int32 ,
1508
+ DataType :: Int64 ,
1509
+ DataType :: UInt8 ,
1510
+ DataType :: UInt16 ,
1511
+ DataType :: UInt32 ,
1512
+ DataType :: UInt64 ,
1513
+ ] ;
1514
+ for data_type in offset_type {
1515
+ let file = File :: open ( "test/data/dictionary_nullable_test.csv" ) . unwrap ( ) ;
1516
+ let dictionary_type =
1517
+ DataType :: Dictionary ( Box :: new ( data_type) , Box :: new ( DataType :: Utf8 ) ) ;
1518
+ let schema = Arc :: new ( Schema :: new ( vec ! [
1519
+ Field :: new( "id" , DataType :: Utf8 , false ) ,
1520
+ Field :: new( "name" , dictionary_type. clone( ) , true ) ,
1521
+ ] ) ) ;
1522
+
1523
+ let mut csv = ReaderBuilder :: new ( schema)
1524
+ . build ( file. try_clone ( ) . unwrap ( ) )
1525
+ . unwrap ( ) ;
1526
+
1527
+ let batch = csv. next ( ) . unwrap ( ) . unwrap ( ) ;
1528
+ assert_eq ! ( 3 , batch. num_rows( ) ) ;
1529
+ assert_eq ! ( 2 , batch. num_columns( ) ) ;
1530
+
1531
+ let names = arrow_cast:: cast ( batch. column ( 1 ) , & dictionary_type) . unwrap ( ) ;
1532
+ assert ! ( !names. is_null( 2 ) ) ;
1533
+ assert ! ( names. is_null( 1 ) ) ;
1534
+ }
1535
+ }
1478
1536
#[ test]
1479
1537
fn test_nulls ( ) {
1480
1538
let schema = Arc :: new ( Schema :: new ( vec ! [
0 commit comments