Skip to content

Commit 227d1f8

Browse files
improve null handling for to_char (#9689)
* improve null handling for to_char * early return from to_char for null format * remove invalid comment, update example * rename column for consistency across platforms for tests * return None instead of empty string from to_char * use arrow:new_null_array for fast init
1 parent 67e0bd3 commit 227d1f8

File tree

3 files changed

+55
-10
lines changed

3 files changed

+55
-10
lines changed

datafusion-examples/examples/to_char.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,5 +193,24 @@ async fn main() -> Result<()> {
193193
&result
194194
);
195195

196+
// output format is null
197+
198+
let result = ctx
199+
.sql("SELECT to_char(arrow_cast(123456, 'Duration(Second)'), null) as result")
200+
.await?
201+
.collect()
202+
.await?;
203+
204+
assert_batches_eq!(
205+
&[
206+
"+--------+",
207+
"| result |",
208+
"+--------+",
209+
"| |",
210+
"+--------+",
211+
],
212+
&result
213+
);
214+
196215
Ok(())
197216
}

datafusion/functions/src/datetime/to_char.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use std::any::Any;
1919
use std::sync::Arc;
2020

2121
use arrow::array::cast::AsArray;
22-
use arrow::array::{Array, ArrayRef, StringArray};
22+
use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
2323
use arrow::datatypes::DataType;
2424
use arrow::datatypes::DataType::{
2525
Date32, Date64, Duration, Time32, Time64, Timestamp, Utf8,
@@ -109,7 +109,6 @@ impl ScalarUDFImpl for ToCharFunc {
109109
}
110110

111111
match &args[1] {
112-
// null format, use default formats
113112
ColumnarValue::Scalar(ScalarValue::Utf8(None))
114113
| ColumnarValue::Scalar(ScalarValue::Null) => {
115114
_to_char_scalar(args[0].clone(), None)
@@ -175,6 +174,18 @@ fn _to_char_scalar(
175174
let data_type = &expression.data_type();
176175
let is_scalar_expression = matches!(&expression, ColumnarValue::Scalar(_));
177176
let array = expression.into_array(1)?;
177+
178+
if format.is_none() {
179+
if is_scalar_expression {
180+
return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
181+
} else {
182+
return Ok(ColumnarValue::Array(new_null_array(
183+
&DataType::Utf8,
184+
array.len(),
185+
)));
186+
}
187+
}
188+
178189
let format_options = match _build_format_options(data_type, format) {
179190
Ok(value) => value,
180191
Err(value) => return value,
@@ -202,7 +213,7 @@ fn _to_char_scalar(
202213

203214
fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
204215
let arrays = ColumnarValue::values_to_arrays(args)?;
205-
let mut results: Vec<String> = vec![];
216+
let mut results: Vec<Option<String>> = vec![];
206217
let format_array = arrays[1].as_string::<i32>();
207218
let data_type = arrays[0].data_type();
208219

@@ -212,6 +223,10 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
212223
} else {
213224
Some(format_array.value(idx))
214225
};
226+
if format.is_none() {
227+
results.push(None);
228+
continue;
229+
}
215230
let format_options = match _build_format_options(data_type, format) {
216231
Ok(value) => value,
217232
Err(value) => return value,
@@ -221,7 +236,7 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
221236
let formatter = ArrayFormatter::try_new(arrays[0].as_ref(), &format_options)?;
222237
let result = formatter.value(idx).try_to_string();
223238
match result {
224-
Ok(value) => results.push(value),
239+
Ok(value) => results.push(Some(value)),
225240
Err(e) => return exec_err!("{}", e),
226241
}
227242
}
@@ -230,9 +245,12 @@ fn _to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
230245
ColumnarValue::Array(_) => Ok(ColumnarValue::Array(Arc::new(StringArray::from(
231246
results,
232247
)) as ArrayRef)),
233-
ColumnarValue::Scalar(_) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
234-
results.first().unwrap().to_string(),
235-
)))),
248+
ColumnarValue::Scalar(_) => match results.first().unwrap() {
249+
Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
250+
value.to_string(),
251+
)))),
252+
None => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
253+
},
236254
}
237255
}
238256

datafusion/sqllogictest/test_files/timestamps.slt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2661,7 +2661,7 @@ PT123456S
26612661
query T
26622662
select to_char(arrow_cast(123456, 'Duration(Second)'), null);
26632663
----
2664-
PT123456S
2664+
NULL
26652665

26662666
query error DataFusion error: Execution error: Cast error: Format error
26672667
SELECT to_char(timestamps, '%X%K') from formats;
@@ -2672,14 +2672,22 @@ SELECT to_char('2000-02-03'::date, '%X%K');
26722672
query T
26732673
SELECT to_char(timestamps, null) from formats;
26742674
----
2675-
2024-01-01T06:00:00Z
2676-
2025-01-01T23:59:58Z
2675+
NULL
2676+
NULL
26772677

26782678
query T
26792679
SELECT to_char(null, '%d-%m-%Y');
26802680
----
26812681
(empty)
26822682

2683+
query T
2684+
SELECT to_char(column1, column2)
2685+
FROM
2686+
(VALUES ('2024-01-01 06:00:00'::timestamp, null), ('2025-01-01 23:59:58'::timestamp, '%d:%m:%Y %H-%M-%S'));
2687+
----
2688+
NULL
2689+
01:01:2025 23-59-58
2690+
26832691
statement ok
26842692
drop table formats;
26852693

0 commit comments

Comments
 (0)