18
18
//! implementation kernels for array functions
19
19
20
20
use arrow:: array:: {
21
- Array , ArrayRef , BooleanArray , Capacities , Date32Array , Float32Array , Float64Array ,
22
- GenericListArray , Int16Array , Int32Array , Int64Array , Int8Array , LargeListArray ,
23
- LargeStringArray , ListArray , ListBuilder , MutableArrayData , OffsetSizeTrait ,
24
- StringArray , StringBuilder , UInt16Array , UInt32Array , UInt64Array , UInt8Array ,
21
+ Array , ArrayRef , BooleanArray , Capacities , Date32Array , GenericListArray , Int64Array ,
22
+ LargeListArray , ListArray , MutableArrayData , OffsetSizeTrait , UInt64Array ,
25
23
} ;
26
24
use arrow:: compute;
27
25
use arrow:: datatypes:: {
@@ -33,335 +31,18 @@ use arrow_schema::FieldRef;
33
31
use arrow_schema:: SortOptions ;
34
32
35
33
use datafusion_common:: cast:: {
36
- as_date32_array, as_generic_list_array, as_generic_string_array, as_int64_array,
37
- as_interval_mdn_array, as_large_list_array, as_list_array, as_null_array,
38
- as_string_array,
34
+ as_date32_array, as_generic_list_array, as_int64_array, as_interval_mdn_array,
35
+ as_large_list_array, as_list_array, as_null_array, as_string_array,
39
36
} ;
40
37
use datafusion_common:: {
41
38
exec_err, internal_datafusion_err, not_impl_datafusion_err, DataFusionError , Result ,
42
39
ScalarValue ,
43
40
} ;
44
41
42
+ use crate :: utils:: downcast_arg;
45
43
use std:: any:: type_name;
46
44
use std:: sync:: Arc ;
47
45
48
- macro_rules! downcast_arg {
49
- ( $ARG: expr, $ARRAY_TYPE: ident) => { {
50
- $ARG. as_any( ) . downcast_ref:: <$ARRAY_TYPE>( ) . ok_or_else( || {
51
- DataFusionError :: Internal ( format!(
52
- "could not cast to {}" ,
53
- type_name:: <$ARRAY_TYPE>( )
54
- ) )
55
- } ) ?
56
- } } ;
57
- }
58
-
59
- macro_rules! to_string {
60
- ( $ARG: expr, $ARRAY: expr, $DELIMITER: expr, $NULL_STRING: expr, $WITH_NULL_STRING: expr, $ARRAY_TYPE: ident) => { {
61
- let arr = downcast_arg!( $ARRAY, $ARRAY_TYPE) ;
62
- for x in arr {
63
- match x {
64
- Some ( x) => {
65
- $ARG. push_str( & x. to_string( ) ) ;
66
- $ARG. push_str( $DELIMITER) ;
67
- }
68
- None => {
69
- if $WITH_NULL_STRING {
70
- $ARG. push_str( $NULL_STRING) ;
71
- $ARG. push_str( $DELIMITER) ;
72
- }
73
- }
74
- }
75
- }
76
- Ok ( $ARG)
77
- } } ;
78
- }
79
-
80
- macro_rules! call_array_function {
81
- ( $DATATYPE: expr, false ) => {
82
- match $DATATYPE {
83
- DataType :: Utf8 => array_function!( StringArray ) ,
84
- DataType :: LargeUtf8 => array_function!( LargeStringArray ) ,
85
- DataType :: Boolean => array_function!( BooleanArray ) ,
86
- DataType :: Float32 => array_function!( Float32Array ) ,
87
- DataType :: Float64 => array_function!( Float64Array ) ,
88
- DataType :: Int8 => array_function!( Int8Array ) ,
89
- DataType :: Int16 => array_function!( Int16Array ) ,
90
- DataType :: Int32 => array_function!( Int32Array ) ,
91
- DataType :: Int64 => array_function!( Int64Array ) ,
92
- DataType :: UInt8 => array_function!( UInt8Array ) ,
93
- DataType :: UInt16 => array_function!( UInt16Array ) ,
94
- DataType :: UInt32 => array_function!( UInt32Array ) ,
95
- DataType :: UInt64 => array_function!( UInt64Array ) ,
96
- _ => unreachable!( ) ,
97
- }
98
- } ;
99
- ( $DATATYPE: expr, $INCLUDE_LIST: expr) => { {
100
- match $DATATYPE {
101
- DataType :: List ( _) => array_function!( ListArray ) ,
102
- DataType :: Utf8 => array_function!( StringArray ) ,
103
- DataType :: LargeUtf8 => array_function!( LargeStringArray ) ,
104
- DataType :: Boolean => array_function!( BooleanArray ) ,
105
- DataType :: Float32 => array_function!( Float32Array ) ,
106
- DataType :: Float64 => array_function!( Float64Array ) ,
107
- DataType :: Int8 => array_function!( Int8Array ) ,
108
- DataType :: Int16 => array_function!( Int16Array ) ,
109
- DataType :: Int32 => array_function!( Int32Array ) ,
110
- DataType :: Int64 => array_function!( Int64Array ) ,
111
- DataType :: UInt8 => array_function!( UInt8Array ) ,
112
- DataType :: UInt16 => array_function!( UInt16Array ) ,
113
- DataType :: UInt32 => array_function!( UInt32Array ) ,
114
- DataType :: UInt64 => array_function!( UInt64Array ) ,
115
- _ => unreachable!( ) ,
116
- }
117
- } } ;
118
- }
119
-
120
- /// Array_to_string SQL function
121
- pub ( super ) fn array_to_string ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
122
- if args. len ( ) < 2 || args. len ( ) > 3 {
123
- return exec_err ! ( "array_to_string expects two or three arguments" ) ;
124
- }
125
-
126
- let arr = & args[ 0 ] ;
127
-
128
- let delimiters = as_string_array ( & args[ 1 ] ) ?;
129
- let delimiters: Vec < Option < & str > > = delimiters. iter ( ) . collect ( ) ;
130
-
131
- let mut null_string = String :: from ( "" ) ;
132
- let mut with_null_string = false ;
133
- if args. len ( ) == 3 {
134
- null_string = as_string_array ( & args[ 2 ] ) ?. value ( 0 ) . to_string ( ) ;
135
- with_null_string = true ;
136
- }
137
-
138
- fn compute_array_to_string (
139
- arg : & mut String ,
140
- arr : ArrayRef ,
141
- delimiter : String ,
142
- null_string : String ,
143
- with_null_string : bool ,
144
- ) -> datafusion_common:: Result < & mut String > {
145
- match arr. data_type ( ) {
146
- DataType :: List ( ..) => {
147
- let list_array = as_list_array ( & arr) ?;
148
- for i in 0 ..list_array. len ( ) {
149
- compute_array_to_string (
150
- arg,
151
- list_array. value ( i) ,
152
- delimiter. clone ( ) ,
153
- null_string. clone ( ) ,
154
- with_null_string,
155
- ) ?;
156
- }
157
-
158
- Ok ( arg)
159
- }
160
- DataType :: LargeList ( ..) => {
161
- let list_array = as_large_list_array ( & arr) ?;
162
- for i in 0 ..list_array. len ( ) {
163
- compute_array_to_string (
164
- arg,
165
- list_array. value ( i) ,
166
- delimiter. clone ( ) ,
167
- null_string. clone ( ) ,
168
- with_null_string,
169
- ) ?;
170
- }
171
-
172
- Ok ( arg)
173
- }
174
- DataType :: Null => Ok ( arg) ,
175
- data_type => {
176
- macro_rules! array_function {
177
- ( $ARRAY_TYPE: ident) => {
178
- to_string!(
179
- arg,
180
- arr,
181
- & delimiter,
182
- & null_string,
183
- with_null_string,
184
- $ARRAY_TYPE
185
- )
186
- } ;
187
- }
188
- call_array_function ! ( data_type, false )
189
- }
190
- }
191
- }
192
-
193
- fn generate_string_array < O : OffsetSizeTrait > (
194
- list_arr : & GenericListArray < O > ,
195
- delimiters : Vec < Option < & str > > ,
196
- null_string : String ,
197
- with_null_string : bool ,
198
- ) -> datafusion_common:: Result < StringArray > {
199
- let mut res: Vec < Option < String > > = Vec :: new ( ) ;
200
- for ( arr, & delimiter) in list_arr. iter ( ) . zip ( delimiters. iter ( ) ) {
201
- if let ( Some ( arr) , Some ( delimiter) ) = ( arr, delimiter) {
202
- let mut arg = String :: from ( "" ) ;
203
- let s = compute_array_to_string (
204
- & mut arg,
205
- arr,
206
- delimiter. to_string ( ) ,
207
- null_string. clone ( ) ,
208
- with_null_string,
209
- ) ?
210
- . clone ( ) ;
211
-
212
- if let Some ( s) = s. strip_suffix ( delimiter) {
213
- res. push ( Some ( s. to_string ( ) ) ) ;
214
- } else {
215
- res. push ( Some ( s) ) ;
216
- }
217
- } else {
218
- res. push ( None ) ;
219
- }
220
- }
221
-
222
- Ok ( StringArray :: from ( res) )
223
- }
224
-
225
- let arr_type = arr. data_type ( ) ;
226
- let string_arr = match arr_type {
227
- DataType :: List ( _) | DataType :: FixedSizeList ( _, _) => {
228
- let list_array = as_list_array ( & arr) ?;
229
- generate_string_array :: < i32 > (
230
- list_array,
231
- delimiters,
232
- null_string,
233
- with_null_string,
234
- ) ?
235
- }
236
- DataType :: LargeList ( _) => {
237
- let list_array = as_large_list_array ( & arr) ?;
238
- generate_string_array :: < i64 > (
239
- list_array,
240
- delimiters,
241
- null_string,
242
- with_null_string,
243
- ) ?
244
- }
245
- _ => {
246
- let mut arg = String :: from ( "" ) ;
247
- let mut res: Vec < Option < String > > = Vec :: new ( ) ;
248
- // delimiter length is 1
249
- assert_eq ! ( delimiters. len( ) , 1 ) ;
250
- let delimiter = delimiters[ 0 ] . unwrap ( ) ;
251
- let s = compute_array_to_string (
252
- & mut arg,
253
- arr. clone ( ) ,
254
- delimiter. to_string ( ) ,
255
- null_string,
256
- with_null_string,
257
- ) ?
258
- . clone ( ) ;
259
-
260
- if !s. is_empty ( ) {
261
- let s = s. strip_suffix ( delimiter) . unwrap ( ) . to_string ( ) ;
262
- res. push ( Some ( s) ) ;
263
- } else {
264
- res. push ( Some ( s) ) ;
265
- }
266
- StringArray :: from ( res)
267
- }
268
- } ;
269
-
270
- Ok ( Arc :: new ( string_arr) )
271
- }
272
-
273
- /// Splits string at occurrences of delimiter and returns an array of parts
274
- /// string_to_array('abc~@~def~@~ghi', '~@~') = '["abc", "def", "ghi"]'
275
- pub fn string_to_array < T : OffsetSizeTrait > ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
276
- if args. len ( ) < 2 || args. len ( ) > 3 {
277
- return exec_err ! ( "string_to_array expects two or three arguments" ) ;
278
- }
279
- let string_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
280
- let delimiter_array = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
281
-
282
- let mut list_builder = ListBuilder :: new ( StringBuilder :: with_capacity (
283
- string_array. len ( ) ,
284
- string_array. get_buffer_memory_size ( ) ,
285
- ) ) ;
286
-
287
- match args. len ( ) {
288
- 2 => {
289
- string_array. iter ( ) . zip ( delimiter_array. iter ( ) ) . for_each (
290
- |( string, delimiter) | {
291
- match ( string, delimiter) {
292
- ( Some ( string) , Some ( "" ) ) => {
293
- list_builder. values ( ) . append_value ( string) ;
294
- list_builder. append ( true ) ;
295
- }
296
- ( Some ( string) , Some ( delimiter) ) => {
297
- string. split ( delimiter) . for_each ( |s| {
298
- list_builder. values ( ) . append_value ( s) ;
299
- } ) ;
300
- list_builder. append ( true ) ;
301
- }
302
- ( Some ( string) , None ) => {
303
- string. chars ( ) . map ( |c| c. to_string ( ) ) . for_each ( |c| {
304
- list_builder. values ( ) . append_value ( c) ;
305
- } ) ;
306
- list_builder. append ( true ) ;
307
- }
308
- _ => list_builder. append ( false ) , // null value
309
- }
310
- } ,
311
- ) ;
312
- }
313
-
314
- 3 => {
315
- let null_value_array = as_generic_string_array :: < T > ( & args[ 2 ] ) ?;
316
- string_array
317
- . iter ( )
318
- . zip ( delimiter_array. iter ( ) )
319
- . zip ( null_value_array. iter ( ) )
320
- . for_each ( |( ( string, delimiter) , null_value) | {
321
- match ( string, delimiter) {
322
- ( Some ( string) , Some ( "" ) ) => {
323
- if Some ( string) == null_value {
324
- list_builder. values ( ) . append_null ( ) ;
325
- } else {
326
- list_builder. values ( ) . append_value ( string) ;
327
- }
328
- list_builder. append ( true ) ;
329
- }
330
- ( Some ( string) , Some ( delimiter) ) => {
331
- string. split ( delimiter) . for_each ( |s| {
332
- if Some ( s) == null_value {
333
- list_builder. values ( ) . append_null ( ) ;
334
- } else {
335
- list_builder. values ( ) . append_value ( s) ;
336
- }
337
- } ) ;
338
- list_builder. append ( true ) ;
339
- }
340
- ( Some ( string) , None ) => {
341
- string. chars ( ) . map ( |c| c. to_string ( ) ) . for_each ( |c| {
342
- if Some ( c. as_str ( ) ) == null_value {
343
- list_builder. values ( ) . append_null ( ) ;
344
- } else {
345
- list_builder. values ( ) . append_value ( c) ;
346
- }
347
- } ) ;
348
- list_builder. append ( true ) ;
349
- }
350
- _ => list_builder. append ( false ) , // null value
351
- }
352
- } ) ;
353
- }
354
- _ => {
355
- return exec_err ! (
356
- "Expect string_to_array function to take two or three parameters"
357
- )
358
- }
359
- }
360
-
361
- let list_array = list_builder. finish ( ) ;
362
- Ok ( Arc :: new ( list_array) as ArrayRef )
363
- }
364
-
365
46
/// Generates an array of integers from start to stop with a given step.
366
47
///
367
48
/// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values.
0 commit comments