diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index 84aa9feec654..ed9858106c52 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -15,16 +15,17 @@ // specific language governing permissions and limitations // under the License. -use arrow::datatypes::DataType; -use arrow::datatypes::DataType::{Int64, Timestamp}; -use arrow::datatypes::TimeUnit::Second; use std::any::Any; -use std::sync::OnceLock; +use std::sync::{Arc, OnceLock}; -use datafusion_common::{exec_err, Result}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::{Int64, Timestamp, Utf8}; +use arrow::datatypes::TimeUnit::Second; +use datafusion_common::{exec_err, internal_err, ExprSchema, Result, ScalarValue}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; #[derive(Debug)] @@ -41,7 +42,10 @@ impl Default for FromUnixtimeFunc { impl FromUnixtimeFunc { pub fn new() -> Self { Self { - signature: Signature::uniform(1, vec![Int64], Volatility::Immutable), + signature: Signature::one_of( + vec![Exact(vec![Int64, Utf8]), Exact(vec![Int64])], + Volatility::Immutable, + ), } } } @@ -59,28 +63,63 @@ impl ScalarUDFImpl for FromUnixtimeFunc { &self.signature } + fn return_type_from_exprs( + &self, + args: &[Expr], + _schema: &dyn ExprSchema, + arg_types: &[DataType], + ) -> Result { + match arg_types.len() { + 1 => Ok(Timestamp(Second, None)), + 2 => match &args[1] { + Expr::Literal(ScalarValue::Utf8(Some(tz))) => Ok(Timestamp(Second, Some(Arc::from(tz.to_string())))), + _ => exec_err!( + "Second argument for `from_unixtime` must be non-null utf8, received {:?}", + arg_types[1]), + }, + _ => exec_err!( + "from_unixtime function requires 1 or 2 arguments, got {}", + arg_types.len() + ), + } + } + fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Timestamp(Second, None)) + internal_err!("call return_type_from_exprs instead") } fn invoke(&self, args: &[ColumnarValue]) -> Result { - if args.len() != 1 { + let len = args.len(); + if len != 1 && len != 2 { return exec_err!( - "from_unixtime function requires 1 argument, got {}", + "from_unixtime function requires 1 or 2 argument, got {}", args.len() ); } - match args[0].data_type() { - Int64 => args[0].cast_to(&Timestamp(Second, None), None), - other => { - exec_err!( - "Unsupported data type {:?} for function from_unixtime", - other - ) - } + if args[0].data_type() != Int64 { + return exec_err!( + "Unsupported data type {:?} for function from_unixtime", + args[0].data_type() + ); + } + + match len { + 1 => args[0].cast_to(&Timestamp(Second, None), None), + 2 => match &args[1] { + ColumnarValue::Scalar(ScalarValue::Utf8(Some(tz))) => args[0] + .cast_to(&Timestamp(Second, Some(Arc::from(tz.to_string()))), None), + _ => { + exec_err!( + "Unsupported data type {:?} for function from_unixtime", + args[1].data_type() + ) + } + }, + _ => unreachable!(), } } + fn documentation(&self) -> Option<&Documentation> { Some(get_from_unixtime_doc()) } @@ -93,12 +132,63 @@ fn get_from_unixtime_doc() -> &'static Documentation { Documentation::builder() .with_doc_section(DOC_SECTION_DATETIME) .with_description("Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.") - .with_syntax_example("from_unixtime(expression)") + .with_syntax_example("from_unixtime(expression[, timezone])") + .with_standard_argument("expression", None) .with_argument( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + "timezone", + "Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC.", ) + .with_sql_example(r#"```sql +> select from_unixtime(1599572549, 'America/New_York'); ++-----------------------------------------------------------+ +| from_unixtime(Int64(1599572549),Utf8("America/New_York")) | ++-----------------------------------------------------------+ +| 2020-09-08T09:42:29-04:00 | ++-----------------------------------------------------------+ +```"#) .build() .unwrap() }) } + +#[cfg(test)] +mod test { + use crate::datetime::from_unixtime::FromUnixtimeFunc; + use datafusion_common::ScalarValue; + use datafusion_common::ScalarValue::Int64; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + #[test] + fn test_without_timezone() { + let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))]; + + let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + + match result { + ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), None)) => { + assert_eq!(sec, 1729900800); + } + _ => panic!("Expected scalar value"), + } + } + + #[test] + fn test_with_timezone() { + let args = [ + ColumnarValue::Scalar(Int64(Some(1729900800))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "America/New_York".to_string(), + ))), + ]; + + let result = FromUnixtimeFunc::new().invoke(&args).unwrap(); + + match result { + ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), Some(tz))) => { + assert_eq!(sec, 1729900800); + assert_eq!(tz.to_string(), "America/New_York"); + } + _ => panic!("Expected scalar value"), + } + } +} diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index a09a63a791fc..42abeff674c5 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -308,6 +308,29 @@ SELECT from_unixtime(ts / 1000000000) FROM ts_data LIMIT 3; 2020-09-08T12:42:29 2020-09-08T11:42:29 +# from_unixtime single + +query P +SELECT from_unixtime(1599572549190855123 / 1000000000, 'America/New_York'); +---- +2020-09-08T09:42:29-04:00 + +# from_unixtime with timezone +query P +SELECT from_unixtime(ts / 1000000000, 'Asia/Istanbul') FROM ts_data LIMIT 3; +---- +2020-09-08T16:42:29+03:00 +2020-09-08T15:42:29+03:00 +2020-09-08T14:42:29+03:00 + +# from_unixtime with utc timezone +query P +SELECT from_unixtime(ts / 1000000000, 'UTC') FROM ts_data LIMIT 3; +---- +2020-09-08T13:42:29Z +2020-09-08T12:42:29Z +2020-09-08T11:42:29Z + # to_timestamp query I diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 98c44cbd981d..b92b815d7c95 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2037,12 +2037,24 @@ _Alias of [date_trunc](#date_trunc)._ Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp. ``` -from_unixtime(expression) +from_unixtime(expression[, timezone]) ``` #### Arguments -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **timezone**: Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC. + +#### Example + +```sql +> select from_unixtime(1599572549, 'America/New_York'); ++-----------------------------------------------------------+ +| from_unixtime(Int64(1599572549),Utf8("America/New_York")) | ++-----------------------------------------------------------+ +| 2020-09-08T09:42:29-04:00 | ++-----------------------------------------------------------+ +``` ### `make_date`