@@ -221,10 +221,14 @@ pub enum ScalarValue {
221
221
UInt64 ( Option < u64 > ) ,
222
222
/// utf-8 encoded string.
223
223
Utf8 ( Option < String > ) ,
224
+ /// utf-8 encoded string but from view types.
225
+ Utf8View ( Option < String > ) ,
224
226
/// utf-8 encoded string representing a LargeString's arrow type.
225
227
LargeUtf8 ( Option < String > ) ,
226
228
/// binary
227
229
Binary ( Option < Vec < u8 > > ) ,
230
+ /// binary but from view types.
231
+ BinaryView ( Option < Vec < u8 > > ) ,
228
232
/// fixed size binary
229
233
FixedSizeBinary ( i32 , Option < Vec < u8 > > ) ,
230
234
/// large binary
@@ -345,10 +349,14 @@ impl PartialEq for ScalarValue {
345
349
( UInt64 ( _) , _) => false ,
346
350
( Utf8 ( v1) , Utf8 ( v2) ) => v1. eq ( v2) ,
347
351
( Utf8 ( _) , _) => false ,
352
+ ( Utf8View ( v1) , Utf8View ( v2) ) => v1. eq ( v2) ,
353
+ ( Utf8View ( _) , _) => false ,
348
354
( LargeUtf8 ( v1) , LargeUtf8 ( v2) ) => v1. eq ( v2) ,
349
355
( LargeUtf8 ( _) , _) => false ,
350
356
( Binary ( v1) , Binary ( v2) ) => v1. eq ( v2) ,
351
357
( Binary ( _) , _) => false ,
358
+ ( BinaryView ( v1) , BinaryView ( v2) ) => v1. eq ( v2) ,
359
+ ( BinaryView ( _) , _) => false ,
352
360
( FixedSizeBinary ( _, v1) , FixedSizeBinary ( _, v2) ) => v1. eq ( v2) ,
353
361
( FixedSizeBinary ( _, _) , _) => false ,
354
362
( LargeBinary ( v1) , LargeBinary ( v2) ) => v1. eq ( v2) ,
@@ -470,8 +478,12 @@ impl PartialOrd for ScalarValue {
470
478
( Utf8 ( _) , _) => None ,
471
479
( LargeUtf8 ( v1) , LargeUtf8 ( v2) ) => v1. partial_cmp ( v2) ,
472
480
( LargeUtf8 ( _) , _) => None ,
481
+ ( Utf8View ( v1) , Utf8View ( v2) ) => v1. partial_cmp ( v2) ,
482
+ ( Utf8View ( _) , _) => None ,
473
483
( Binary ( v1) , Binary ( v2) ) => v1. partial_cmp ( v2) ,
474
484
( Binary ( _) , _) => None ,
485
+ ( BinaryView ( v1) , BinaryView ( v2) ) => v1. partial_cmp ( v2) ,
486
+ ( BinaryView ( _) , _) => None ,
475
487
( FixedSizeBinary ( _, v1) , FixedSizeBinary ( _, v2) ) => v1. partial_cmp ( v2) ,
476
488
( FixedSizeBinary ( _, _) , _) => None ,
477
489
( LargeBinary ( v1) , LargeBinary ( v2) ) => v1. partial_cmp ( v2) ,
@@ -667,11 +679,10 @@ impl std::hash::Hash for ScalarValue {
667
679
UInt16 ( v) => v. hash ( state) ,
668
680
UInt32 ( v) => v. hash ( state) ,
669
681
UInt64 ( v) => v. hash ( state) ,
670
- Utf8 ( v) => v. hash ( state) ,
671
- LargeUtf8 ( v) => v. hash ( state) ,
672
- Binary ( v) => v. hash ( state) ,
673
- FixedSizeBinary ( _, v) => v. hash ( state) ,
674
- LargeBinary ( v) => v. hash ( state) ,
682
+ Utf8 ( v) | LargeUtf8 ( v) | Utf8View ( v) => v. hash ( state) ,
683
+ Binary ( v) | FixedSizeBinary ( _, v) | LargeBinary ( v) | BinaryView ( v) => {
684
+ v. hash ( state)
685
+ }
675
686
List ( arr) => {
676
687
hash_nested_array ( arr. to_owned ( ) as ArrayRef , state) ;
677
688
}
@@ -1107,7 +1118,9 @@ impl ScalarValue {
1107
1118
ScalarValue :: Float64 ( _) => DataType :: Float64 ,
1108
1119
ScalarValue :: Utf8 ( _) => DataType :: Utf8 ,
1109
1120
ScalarValue :: LargeUtf8 ( _) => DataType :: LargeUtf8 ,
1121
+ ScalarValue :: Utf8View ( _) => DataType :: Utf8View ,
1110
1122
ScalarValue :: Binary ( _) => DataType :: Binary ,
1123
+ ScalarValue :: BinaryView ( _) => DataType :: BinaryView ,
1111
1124
ScalarValue :: FixedSizeBinary ( sz, _) => DataType :: FixedSizeBinary ( * sz) ,
1112
1125
ScalarValue :: LargeBinary ( _) => DataType :: LargeBinary ,
1113
1126
ScalarValue :: List ( arr) => arr. data_type ( ) . to_owned ( ) ,
@@ -1310,11 +1323,13 @@ impl ScalarValue {
1310
1323
ScalarValue :: UInt16 ( v) => v. is_none ( ) ,
1311
1324
ScalarValue :: UInt32 ( v) => v. is_none ( ) ,
1312
1325
ScalarValue :: UInt64 ( v) => v. is_none ( ) ,
1313
- ScalarValue :: Utf8 ( v) => v. is_none ( ) ,
1314
- ScalarValue :: LargeUtf8 ( v) => v. is_none ( ) ,
1315
- ScalarValue :: Binary ( v) => v. is_none ( ) ,
1316
- ScalarValue :: FixedSizeBinary ( _, v) => v. is_none ( ) ,
1317
- ScalarValue :: LargeBinary ( v) => v. is_none ( ) ,
1326
+ ScalarValue :: Utf8 ( v)
1327
+ | ScalarValue :: Utf8View ( v)
1328
+ | ScalarValue :: LargeUtf8 ( v) => v. is_none ( ) ,
1329
+ ScalarValue :: Binary ( v)
1330
+ | ScalarValue :: BinaryView ( v)
1331
+ | ScalarValue :: FixedSizeBinary ( _, v)
1332
+ | ScalarValue :: LargeBinary ( v) => v. is_none ( ) ,
1318
1333
// arr.len() should be 1 for a list scalar, but we don't seem to
1319
1334
// enforce that anywhere, so we still check against array length.
1320
1335
ScalarValue :: List ( arr) => arr. len ( ) == arr. null_count ( ) ,
@@ -2002,6 +2017,12 @@ impl ScalarValue {
2002
2017
}
2003
2018
None => new_null_array ( & DataType :: Utf8 , size) ,
2004
2019
} ,
2020
+ ScalarValue :: Utf8View ( e) => match e {
2021
+ Some ( value) => {
2022
+ Arc :: new ( StringViewArray :: from_iter_values ( repeat ( value) . take ( size) ) )
2023
+ }
2024
+ None => new_null_array ( & DataType :: Utf8View , size) ,
2025
+ } ,
2005
2026
ScalarValue :: LargeUtf8 ( e) => match e {
2006
2027
Some ( value) => {
2007
2028
Arc :: new ( LargeStringArray :: from_iter_values ( repeat ( value) . take ( size) ) )
@@ -2018,6 +2039,16 @@ impl ScalarValue {
2018
2039
Arc :: new ( repeat ( None :: < & str > ) . take ( size) . collect :: < BinaryArray > ( ) )
2019
2040
}
2020
2041
} ,
2042
+ ScalarValue :: BinaryView ( e) => match e {
2043
+ Some ( value) => Arc :: new (
2044
+ repeat ( Some ( value. as_slice ( ) ) )
2045
+ . take ( size)
2046
+ . collect :: < BinaryViewArray > ( ) ,
2047
+ ) ,
2048
+ None => {
2049
+ Arc :: new ( repeat ( None :: < & str > ) . take ( size) . collect :: < BinaryViewArray > ( ) )
2050
+ }
2051
+ } ,
2021
2052
ScalarValue :: FixedSizeBinary ( s, e) => match e {
2022
2053
Some ( value) => Arc :: new (
2023
2054
FixedSizeBinaryArray :: try_from_sparse_iter_with_size (
@@ -2361,10 +2392,14 @@ impl ScalarValue {
2361
2392
DataType :: LargeBinary => {
2362
2393
typed_cast ! ( array, index, LargeBinaryArray , LargeBinary ) ?
2363
2394
}
2395
+ DataType :: BinaryView => {
2396
+ typed_cast ! ( array, index, BinaryViewArray , BinaryView ) ?
2397
+ }
2364
2398
DataType :: Utf8 => typed_cast ! ( array, index, StringArray , Utf8 ) ?,
2365
2399
DataType :: LargeUtf8 => {
2366
2400
typed_cast ! ( array, index, LargeStringArray , LargeUtf8 ) ?
2367
2401
}
2402
+ DataType :: Utf8View => typed_cast ! ( array, index, StringViewArray , Utf8View ) ?,
2368
2403
DataType :: List ( _) => {
2369
2404
let list_array = array. as_list :: < i32 > ( ) ;
2370
2405
let nested_array = list_array. value ( index) ;
@@ -2652,12 +2687,18 @@ impl ScalarValue {
2652
2687
ScalarValue :: Utf8 ( val) => {
2653
2688
eq_array_primitive ! ( array, index, StringArray , val) ?
2654
2689
}
2690
+ ScalarValue :: Utf8View ( val) => {
2691
+ eq_array_primitive ! ( array, index, StringViewArray , val) ?
2692
+ }
2655
2693
ScalarValue :: LargeUtf8 ( val) => {
2656
2694
eq_array_primitive ! ( array, index, LargeStringArray , val) ?
2657
2695
}
2658
2696
ScalarValue :: Binary ( val) => {
2659
2697
eq_array_primitive ! ( array, index, BinaryArray , val) ?
2660
2698
}
2699
+ ScalarValue :: BinaryView ( val) => {
2700
+ eq_array_primitive ! ( array, index, BinaryViewArray , val) ?
2701
+ }
2661
2702
ScalarValue :: FixedSizeBinary ( _, val) => {
2662
2703
eq_array_primitive ! ( array, index, FixedSizeBinaryArray , val) ?
2663
2704
}
@@ -2790,7 +2831,9 @@ impl ScalarValue {
2790
2831
| ScalarValue :: DurationMillisecond ( _)
2791
2832
| ScalarValue :: DurationMicrosecond ( _)
2792
2833
| ScalarValue :: DurationNanosecond ( _) => 0 ,
2793
- ScalarValue :: Utf8 ( s) | ScalarValue :: LargeUtf8 ( s) => {
2834
+ ScalarValue :: Utf8 ( s)
2835
+ | ScalarValue :: LargeUtf8 ( s)
2836
+ | ScalarValue :: Utf8View ( s) => {
2794
2837
s. as_ref ( ) . map ( |s| s. capacity ( ) ) . unwrap_or_default ( )
2795
2838
}
2796
2839
ScalarValue :: TimestampSecond ( _, s)
@@ -2801,7 +2844,8 @@ impl ScalarValue {
2801
2844
}
2802
2845
ScalarValue :: Binary ( b)
2803
2846
| ScalarValue :: FixedSizeBinary ( _, b)
2804
- | ScalarValue :: LargeBinary ( b) => {
2847
+ | ScalarValue :: LargeBinary ( b)
2848
+ | ScalarValue :: BinaryView ( b) => {
2805
2849
b. as_ref ( ) . map ( |b| b. capacity ( ) ) . unwrap_or_default ( )
2806
2850
}
2807
2851
ScalarValue :: List ( arr) => arr. get_array_memory_size ( ) ,
@@ -3068,7 +3112,9 @@ impl TryFrom<&DataType> for ScalarValue {
3068
3112
}
3069
3113
DataType :: Utf8 => ScalarValue :: Utf8 ( None ) ,
3070
3114
DataType :: LargeUtf8 => ScalarValue :: LargeUtf8 ( None ) ,
3115
+ DataType :: Utf8View => ScalarValue :: Utf8View ( None ) ,
3071
3116
DataType :: Binary => ScalarValue :: Binary ( None ) ,
3117
+ DataType :: BinaryView => ScalarValue :: BinaryView ( None ) ,
3072
3118
DataType :: FixedSizeBinary ( len) => ScalarValue :: FixedSizeBinary ( * len, None ) ,
3073
3119
DataType :: LargeBinary => ScalarValue :: LargeBinary ( None ) ,
3074
3120
DataType :: Date32 => ScalarValue :: Date32 ( None ) ,
@@ -3190,11 +3236,13 @@ impl fmt::Display for ScalarValue {
3190
3236
ScalarValue :: TimestampMillisecond ( e, _) => format_option ! ( f, e) ?,
3191
3237
ScalarValue :: TimestampMicrosecond ( e, _) => format_option ! ( f, e) ?,
3192
3238
ScalarValue :: TimestampNanosecond ( e, _) => format_option ! ( f, e) ?,
3193
- ScalarValue :: Utf8 ( e) => format_option ! ( f, e) ?,
3194
- ScalarValue :: LargeUtf8 ( e) => format_option ! ( f, e) ?,
3239
+ ScalarValue :: Utf8 ( e)
3240
+ | ScalarValue :: LargeUtf8 ( e)
3241
+ | ScalarValue :: Utf8View ( e) => format_option ! ( f, e) ?,
3195
3242
ScalarValue :: Binary ( e)
3196
3243
| ScalarValue :: FixedSizeBinary ( _, e)
3197
- | ScalarValue :: LargeBinary ( e) => match e {
3244
+ | ScalarValue :: LargeBinary ( e)
3245
+ | ScalarValue :: BinaryView ( e) => match e {
3198
3246
Some ( l) => write ! (
3199
3247
f,
3200
3248
"{}" ,
@@ -3318,10 +3366,14 @@ impl fmt::Debug for ScalarValue {
3318
3366
}
3319
3367
ScalarValue :: Utf8 ( None ) => write ! ( f, "Utf8({self})" ) ,
3320
3368
ScalarValue :: Utf8 ( Some ( _) ) => write ! ( f, "Utf8(\" {self}\" )" ) ,
3369
+ ScalarValue :: Utf8View ( None ) => write ! ( f, "Utf8View({self})" ) ,
3370
+ ScalarValue :: Utf8View ( Some ( _) ) => write ! ( f, "Utf8View(\" {self}\" )" ) ,
3321
3371
ScalarValue :: LargeUtf8 ( None ) => write ! ( f, "LargeUtf8({self})" ) ,
3322
3372
ScalarValue :: LargeUtf8 ( Some ( _) ) => write ! ( f, "LargeUtf8(\" {self}\" )" ) ,
3323
3373
ScalarValue :: Binary ( None ) => write ! ( f, "Binary({self})" ) ,
3324
3374
ScalarValue :: Binary ( Some ( _) ) => write ! ( f, "Binary(\" {self}\" )" ) ,
3375
+ ScalarValue :: BinaryView ( None ) => write ! ( f, "BinaryView({self})" ) ,
3376
+ ScalarValue :: BinaryView ( Some ( _) ) => write ! ( f, "BinaryView(\" {self}\" )" ) ,
3325
3377
ScalarValue :: FixedSizeBinary ( size, None ) => {
3326
3378
write ! ( f, "FixedSizeBinary({size}, {self})" )
3327
3379
}
@@ -5393,6 +5445,17 @@ mod tests {
5393
5445
ScalarValue :: Utf8 ( None ) ,
5394
5446
DataType :: Dictionary ( Box :: new ( DataType :: Int32 ) , Box :: new ( DataType :: Utf8 ) ) ,
5395
5447
) ;
5448
+
5449
+ // needs https://github.com/apache/arrow-rs/issues/5893
5450
+ /*
5451
+ check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
5452
+ check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
5453
+ check_scalar_cast(
5454
+ ScalarValue::from("larger than 12 bytes string"),
5455
+ DataType::Utf8View,
5456
+ );
5457
+
5458
+ */
5396
5459
}
5397
5460
5398
5461
// mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
0 commit comments