Skip to content

Commit 5fa8b3b

Browse files
authored
fix: encode should work with non-UTF-8 binaries (#14087)
* fix: encode function should work with strings and binary closes #14055 * chore: address comments, add test
1 parent 50c7977 commit 5fa8b3b

File tree

2 files changed

+45
-7
lines changed

2 files changed

+45
-7
lines changed

datafusion/functions/src/encoding/inner.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,21 @@ impl ScalarUDFImpl for EncodeFunc {
8585
}
8686

8787
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
88-
Ok(arg_types[0].to_owned())
88+
use DataType::*;
89+
90+
Ok(match arg_types[0] {
91+
Utf8 => Utf8,
92+
LargeUtf8 => LargeUtf8,
93+
Utf8View => Utf8,
94+
Binary => Utf8,
95+
LargeBinary => LargeUtf8,
96+
Null => Null,
97+
_ => {
98+
return plan_err!(
99+
"The encode function can only accept Utf8 or Binary or Null."
100+
);
101+
}
102+
})
89103
}
90104

91105
fn invoke_batch(
@@ -110,12 +124,12 @@ impl ScalarUDFImpl for EncodeFunc {
110124
}
111125

112126
match arg_types[0] {
113-
DataType::Utf8 | DataType::Utf8View | DataType::Binary | DataType::Null => {
127+
DataType::Utf8 | DataType::Utf8View | DataType::Null => {
114128
Ok(vec![DataType::Utf8; 2])
115129
}
116-
DataType::LargeUtf8 | DataType::LargeBinary => {
117-
Ok(vec![DataType::LargeUtf8, DataType::Utf8])
118-
}
130+
DataType::LargeUtf8 => Ok(vec![DataType::LargeUtf8, DataType::Utf8]),
131+
DataType::Binary => Ok(vec![DataType::Binary, DataType::Utf8]),
132+
DataType::LargeBinary => Ok(vec![DataType::LargeBinary, DataType::Utf8]),
119133
_ => plan_err!(
120134
"1st argument should be Utf8 or Binary or Null, got {:?}",
121135
arg_types[0]

datafusion/sqllogictest/test_files/encoding.slt

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ CREATE TABLE test(
2323
hex_field TEXT
2424
) as VALUES
2525
(0, 'abc', encode('abc', 'base64'), encode('abc', 'hex')),
26-
(1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
27-
(2, NULL, NULL, NULL)
26+
(1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
27+
(2, NULL, NULL, NULL),
28+
(3, X'8f50d3f60eae370ddbf85c86219c55108a350165', encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'hex'))
2829
;
2930

3031
# errors
@@ -43,34 +44,51 @@ select decode(hex_field, 'non_encoding') from test;
4344
query error
4445
select to_hex(hex_field) from test;
4546

47+
query error
48+
select arrow_cast(decode(X'8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), 'Utf8');
49+
4650
# Arrays tests
4751
query T
4852
SELECT encode(bin_field, 'hex') FROM test ORDER BY num;
4953
----
5054
616263
5155
717765717765
5256
NULL
57+
8f50d3f60eae370ddbf85c86219c55108a350165
5358

5459
query T
5560
SELECT arrow_cast(decode(base64_field, 'base64'), 'Utf8') FROM test ORDER BY num;
5661
----
5762
abc
5863
qweqwe
5964
NULL
65+
8f50d3f60eae370ddbf85c86219c55108a350165
6066

6167
query T
6268
SELECT arrow_cast(decode(hex_field, 'hex'), 'Utf8') FROM test ORDER BY num;
6369
----
6470
abc
6571
qweqwe
6672
NULL
73+
8f50d3f60eae370ddbf85c86219c55108a350165
6774

6875
query T
6976
select to_hex(num) from test ORDER BY num;
7077
----
7178
0
7279
1
7380
2
81+
3
82+
83+
query T
84+
select encode(bin_field, 'base64') FROM test WHERE num = 3;
85+
----
86+
j1DT9g6uNw3b+FyGIZxVEIo1AWU
87+
88+
query B
89+
select decode(encode(bin_field, 'base64'), 'base64') = X'8f50d3f60eae370ddbf85c86219c55108a350165' FROM test WHERE num = 3;
90+
----
91+
true
7492

7593
# test for Utf8View support for encode
7694
statement ok
@@ -102,3 +120,9 @@ Andrew QW5kcmV3 416e64726577 X WA 58
102120
Xiangpeng WGlhbmdwZW5n 5869616e6770656e67 Xiangpeng WGlhbmdwZW5n 5869616e6770656e67
103121
Raphael UmFwaGFlbA 5261706861656c R Ug 52
104122
NULL NULL NULL R Ug 52
123+
124+
# test for hex digest
125+
query T
126+
select encode(digest('hello', 'sha256'), 'hex');
127+
----
128+
2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824

0 commit comments

Comments
 (0)