@@ -548,6 +548,89 @@ pub fn ilike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
548
548
Ok ( BooleanArray :: from ( data) )
549
549
}
550
550
551
+ /// Perform SQL `left NOT ILIKE right` operation on [`StringArray`] /
552
+ /// [`LargeStringArray`].
553
+ ///
554
+ /// See the documentation on [`like_utf8`] for more details.
555
+ pub fn nilike_utf8 < OffsetSize : StringOffsetSizeTrait > (
556
+ left : & GenericStringArray < OffsetSize > ,
557
+ right : & GenericStringArray < OffsetSize > ,
558
+ ) -> Result < BooleanArray > {
559
+ regex_like ( left, right, true , |re_pattern| {
560
+ Regex :: new ( & format ! ( "(?i)^{}$" , re_pattern) ) . map_err ( |e| {
561
+ ArrowError :: ComputeError ( format ! (
562
+ "Unable to build regex from ILIKE pattern: {}" ,
563
+ e
564
+ ) )
565
+ } )
566
+ } )
567
+ }
568
+
569
+ /// Perform SQL `left NOT ILIKE right` operation on [`StringArray`] /
570
+ /// [`LargeStringArray`] and a scalar.
571
+ ///
572
+ /// See the documentation on [`like_utf8`] for more details.
573
+ pub fn nilike_utf8_scalar < OffsetSize : StringOffsetSizeTrait > (
574
+ left : & GenericStringArray < OffsetSize > ,
575
+ right : & str ,
576
+ ) -> Result < BooleanArray > {
577
+ let null_bit_buffer = left. data ( ) . null_buffer ( ) . cloned ( ) ;
578
+ let mut result = BooleanBufferBuilder :: new ( left. len ( ) ) ;
579
+
580
+ if !right. contains ( is_like_pattern) {
581
+ // fast path, can use equals
582
+ for i in 0 ..left. len ( ) {
583
+ result. append ( left. value ( i) != right) ;
584
+ }
585
+ } else if right. ends_with ( '%' ) && !right[ ..right. len ( ) - 1 ] . contains ( is_like_pattern)
586
+ {
587
+ // fast path, can use ends_with
588
+ for i in 0 ..left. len ( ) {
589
+ result. append (
590
+ !left
591
+ . value ( i)
592
+ . to_uppercase ( )
593
+ . starts_with ( & right[ ..right. len ( ) - 1 ] . to_uppercase ( ) ) ,
594
+ ) ;
595
+ }
596
+ } else if right. starts_with ( '%' ) && !right[ 1 ..] . contains ( is_like_pattern) {
597
+ // fast path, can use starts_with
598
+ for i in 0 ..left. len ( ) {
599
+ result. append (
600
+ !left
601
+ . value ( i)
602
+ . to_uppercase ( )
603
+ . ends_with ( & right[ 1 ..] . to_uppercase ( ) ) ,
604
+ ) ;
605
+ }
606
+ } else {
607
+ let re_pattern = escape ( right) . replace ( '%' , ".*" ) . replace ( '_' , "." ) ;
608
+ let re = Regex :: new ( & format ! ( "(?i)^{}$" , re_pattern) ) . map_err ( |e| {
609
+ ArrowError :: ComputeError ( format ! (
610
+ "Unable to build regex from ILIKE pattern: {}" ,
611
+ e
612
+ ) )
613
+ } ) ?;
614
+ for i in 0 ..left. len ( ) {
615
+ let haystack = left. value ( i) ;
616
+ result. append ( !re. is_match ( haystack) ) ;
617
+ }
618
+ }
619
+
620
+ let data = unsafe {
621
+ ArrayData :: new_unchecked (
622
+ DataType :: Boolean ,
623
+ left. len ( ) ,
624
+ None ,
625
+ null_bit_buffer,
626
+ 0 ,
627
+ vec ! [ result. finish( ) ] ,
628
+ vec ! [ ] ,
629
+ )
630
+ } ;
631
+ Ok ( BooleanArray :: from ( data) )
632
+ }
633
+
551
634
/// Perform SQL `array ~ regex_array` operation on [`StringArray`] / [`LargeStringArray`].
552
635
/// If `regex_array` element has an empty value, the corresponding result value is always true.
553
636
///
@@ -3983,6 +4066,60 @@ mod tests {
3983
4066
vec![ false , true , false , false ]
3984
4067
) ;
3985
4068
4069
+ test_utf8 ! (
4070
+ test_utf8_array_nilike,
4071
+ vec![ "arrow" , "arrow" , "ARROW" , "arrow" , "ARROW" , "ARROWS" , "arROw" ] ,
4072
+ vec![ "arrow" , "ar%" , "%ro%" , "foo" , "ar%r" , "arrow_" , "arrow_" ] ,
4073
+ nilike_utf8,
4074
+ vec![ false , false , false , true , true , false , true ]
4075
+ ) ;
4076
+ test_utf8_scalar ! (
4077
+ nilike_utf8_scalar_escape_testing,
4078
+ vec![ "varchar(255)" , "int(255)" , "varchar" , "int" ] ,
4079
+ "%(%)%" ,
4080
+ nilike_utf8_scalar,
4081
+ vec![ false , false , true , true ]
4082
+ ) ;
4083
+ test_utf8_scalar ! (
4084
+ test_utf8_array_nilike_scalar,
4085
+ vec![ "arrow" , "parquet" , "datafusion" , "flight" ] ,
4086
+ "%AR%" ,
4087
+ nilike_utf8_scalar,
4088
+ vec![ false , false , true , true ]
4089
+ ) ;
4090
+
4091
+ test_utf8_scalar ! (
4092
+ test_utf8_array_nilike_scalar_start,
4093
+ vec![ "arrow" , "parrow" , "arrows" , "ARR" ] ,
4094
+ "aRRow%" ,
4095
+ nilike_utf8_scalar,
4096
+ vec![ false , true , false , true ]
4097
+ ) ;
4098
+
4099
+ test_utf8_scalar ! (
4100
+ test_utf8_array_nilike_scalar_end,
4101
+ vec![ "ArroW" , "parrow" , "ARRowS" , "arr" ] ,
4102
+ "%arrow" ,
4103
+ nilike_utf8_scalar,
4104
+ vec![ false , false , true , true ]
4105
+ ) ;
4106
+
4107
+ test_utf8_scalar ! (
4108
+ test_utf8_array_nilike_scalar_equals,
4109
+ vec![ "arrow" , "parrow" , "arrows" , "arr" ] ,
4110
+ "arrow" ,
4111
+ nilike_utf8_scalar,
4112
+ vec![ false , true , true , true ]
4113
+ ) ;
4114
+
4115
+ test_utf8_scalar ! (
4116
+ test_utf8_array_nilike_scalar_one,
4117
+ vec![ "arrow" , "arrows" , "parrow" , "arr" ] ,
4118
+ "arrow_" ,
4119
+ nilike_utf8_scalar,
4120
+ vec![ true , false , true , true ]
4121
+ ) ;
4122
+
3986
4123
test_utf8 ! (
3987
4124
test_utf8_array_neq,
3988
4125
vec![ "arrow" , "arrow" , "arrow" , "arrow" ] ,
0 commit comments