You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Implemented simplify for the starts_with function to convert it into a LIKE expression. (#14119)
* Implemented `simplify` for the `starts_with` function to convert it into a LIKE expression, enabling predicate pruning optimization.
* fix: escape special characters in starts_with to LIKE conversion
* updated simply function to handle utf8, largeutf8 and utf8view data type. and updated the coresponding test
* Add some more tests
* Add pruning test
---------
Co-authored-by: Andrew Lamb <[email protected]>
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/foo.parquet]]}, projection=[column1], predicate=column1@0 LIKE f%, pruning_predicate=column1_null_count@2 != column1_row_count@3 AND column1_min@0 <= g AND f <= column1_max@1, required_guarantees=[]
Copy file name to clipboardExpand all lines: datafusion/sqllogictest/test_files/string/string_view.slt
+44-2Lines changed: 44 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -344,9 +344,51 @@ EXPLAIN SELECT
344
344
FROM test;
345
345
----
346
346
logical_plan
347
-
01)Projection: starts_with(test.column1_utf8view, Utf8View("äöüß")) AS c1, starts_with(test.column1_utf8view, Utf8View("")) AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
347
+
01)Projection: test.column1_utf8view LIKE Utf8View("äöüß%") AS c1, CASE test.column1_utf8view IS NOT NULL WHEN Boolean(true) THEN Boolean(true) END AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
348
348
02)--TableScan: test projection=[column1_utf8view]
349
349
350
+
## Test STARTS_WITH is rewitten to LIKE when the pattern is a constant
351
+
query TT
352
+
EXPLAIN SELECT
353
+
STARTS_WITH(column1_utf8, 'foo%') as c1,
354
+
STARTS_WITH(column1_large_utf8, 'foo%') as c2,
355
+
STARTS_WITH(column1_utf8view, 'foo%') as c3,
356
+
STARTS_WITH(column1_utf8, 'f_o') as c4,
357
+
STARTS_WITH(column1_large_utf8, 'f_o') as c5,
358
+
STARTS_WITH(column1_utf8view, 'f_o') as c6
359
+
FROM test;
360
+
----
361
+
logical_plan
362
+
01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f_o%") AS c5, test.column1_utf8view LIKE Utf8View("f_o%") AS c6
363
+
02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view]
364
+
365
+
## Test STARTS_WITH works with column arguments
366
+
query TT
367
+
EXPLAIN SELECT
368
+
STARTS_WITH(column1_utf8, substr(column1_utf8, 1, 2)) as c1,
369
+
STARTS_WITH(column1_large_utf8, substr(column1_large_utf8, 1, 2)) as c2,
370
+
STARTS_WITH(column1_utf8view, substr(column1_utf8view, 1, 2)) as c3
371
+
FROM test;
372
+
----
373
+
logical_plan
374
+
01)Projection: starts_with(test.column1_utf8, substr(test.column1_utf8, Int64(1), Int64(2))) AS c1, starts_with(test.column1_large_utf8, substr(test.column1_large_utf8, Int64(1), Int64(2))) AS c2, starts_with(test.column1_utf8view, substr(test.column1_utf8view, Int64(1), Int64(2))) AS c3
375
+
02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view]
376
+
377
+
query BBB
378
+
SELECT
379
+
STARTS_WITH(column1_utf8, substr(column1_utf8, 1, 2)) as c1,
380
+
STARTS_WITH(column1_large_utf8, substr(column1_large_utf8, 1, 2)) as c2,
381
+
STARTS_WITH(column1_utf8view, substr(column1_utf8view, 1, 2)) as c3
382
+
FROM test;
383
+
----
384
+
true true true
385
+
true true true
386
+
true true true
387
+
true true true
388
+
NULL NULL NULL
389
+
390
+
391
+
# Ensure that INIT cap works with utf8view
350
392
query TT
351
393
EXPLAIN SELECT
352
394
INITCAP(column1_utf8view) as c
@@ -887,7 +929,7 @@ EXPLAIN SELECT
887
929
FROM test;
888
930
----
889
931
logical_plan
890
-
01)Projection: starts_with(test.column1_utf8view, Utf8View("foo")) AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
932
+
01)Projection: test.column1_utf8view LIKE Utf8View("foo%") AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
891
933
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
0 commit comments