Skip to content

Commit e18ec17

Browse files
committed
feat(substrait): use IntervalCompound instead of interval-month-day-nano UDT
1 parent 146f16a commit e18ec17

File tree

4 files changed

+77
-118
lines changed

4 files changed

+77
-118
lines changed

datafusion/substrait/src/logical_plan/consumer.rs

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,18 @@ use crate::variation_const::{
4242
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
4343
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
4444
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
45-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
46-
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
45+
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
46+
VIEW_CONTAINER_TYPE_VARIATION_REF,
4747
};
4848
#[allow(deprecated)]
4949
use crate::variation_const::{
50-
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
51-
INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
52-
TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
53-
TIMESTAMP_SECOND_TYPE_VARIATION_REF,
50+
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
51+
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
52+
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
53+
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
5454
};
5555
use datafusion::arrow::array::{new_empty_array, AsArray};
56+
use datafusion::arrow::temporal_conversions::NANOSECONDS;
5657
use datafusion::common::scalar::ScalarStructBuilder;
5758
use datafusion::dataframe::DataFrame;
5859
use datafusion::logical_expr::expr::InList;
@@ -71,10 +72,10 @@ use datafusion::{
7172
use std::collections::HashSet;
7273
use std::sync::Arc;
7374
use substrait::proto::exchange_rel::ExchangeKind;
74-
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
7575
use substrait::proto::expression::literal::user_defined::Val;
7676
use substrait::proto::expression::literal::{
77-
IntervalDayToSecond, IntervalYearToMonth, UserDefined,
77+
interval_day_to_second, IntervalCompound, IntervalDayToSecond, IntervalYearToMonth,
78+
UserDefined,
7879
};
7980
use substrait::proto::expression::subquery::SubqueryType;
8081
use substrait::proto::expression::{self, FieldReference, Literal, ScalarFunction};
@@ -1831,8 +1832,13 @@ fn from_substrait_type(
18311832
Ok(DataType::Interval(IntervalUnit::YearMonth))
18321833
}
18331834
r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)),
1835+
r#type::Kind::IntervalCompound(_) => {
1836+
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
1837+
}
18341838
r#type::Kind::UserDefined(u) => {
1839+
// Kept for backwards compatibility, use IntervalCompound instead
18351840
if let Some(name) = extensions.types.get(&u.type_reference) {
1841+
#[allow(deprecated)]
18361842
match name.as_ref() {
18371843
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
18381844
_ => not_impl_err!(
@@ -1842,7 +1848,7 @@ fn from_substrait_type(
18421848
),
18431849
}
18441850
} else {
1845-
// Kept for backwards compatibility, new plans should include the extension instead
1851+
// Kept for backwards compatibility, use IntervalCompound instead
18461852
#[allow(deprecated)]
18471853
match u.type_reference {
18481854
// Kept for backwards compatibility, use IntervalYear instead
@@ -2275,6 +2281,7 @@ fn from_substrait_literal(
22752281
subseconds,
22762282
precision_mode,
22772283
})) => {
2284+
use interval_day_to_second::PrecisionMode;
22782285
// DF only supports millisecond precision, so for any more granular type we lose precision
22792286
let milliseconds = match precision_mode {
22802287
Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
@@ -2299,6 +2306,35 @@ fn from_substrait_literal(
22992306
Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
23002307
ScalarValue::new_interval_ym(*years, *months)
23012308
}
2309+
Some(LiteralType::IntervalCompound(IntervalCompound {
2310+
interval_year_to_month,
2311+
interval_day_to_second,
2312+
})) => match (interval_year_to_month, interval_day_to_second) {
2313+
(
2314+
Some(IntervalYearToMonth { years, months }),
2315+
Some(IntervalDayToSecond {
2316+
days,
2317+
seconds,
2318+
subseconds,
2319+
precision_mode:
2320+
Some(interval_day_to_second::PrecisionMode::Precision(p)),
2321+
}),
2322+
) => {
2323+
if *p < 0 || *p > 9 {
2324+
return plan_err!(
2325+
"Unsupported Substrait interval day to second precision: {}",
2326+
p
2327+
);
2328+
}
2329+
let nanos = *subseconds * i64::pow(10, (*p - 9) as u32);
2330+
ScalarValue::new_interval_mdn(
2331+
*years * 12 + months,
2332+
*days,
2333+
*seconds as i64 * NANOSECONDS + nanos,
2334+
)
2335+
}
2336+
_ => return plan_err!("Substrait compound interval missing components"),
2337+
},
23022338
Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
23032339
Some(LiteralType::UserDefined(user_defined)) => {
23042340
// Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
@@ -2329,6 +2365,8 @@ fn from_substrait_literal(
23292365

23302366
if let Some(name) = extensions.types.get(&user_defined.type_reference) {
23312367
match name.as_ref() {
2368+
// Kept for backwards compatibility - new plans should use IntervalCompound instead
2369+
#[allow(deprecated)]
23322370
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
23332371
interval_month_day_nano(user_defined)?
23342372
}
@@ -2379,6 +2417,7 @@ fn from_substrait_literal(
23792417
milliseconds,
23802418
}))
23812419
}
2420+
// Kept for backwards compatibility, use IntervalCompound instead
23822421
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
23832422
interval_month_day_nano(user_defined)?
23842423
}

datafusion/substrait/src/logical_plan/producer.rs

Lines changed: 24 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ use crate::variation_const::{
4343
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
4444
};
4545
use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
46+
use datafusion::arrow::temporal_conversions::NANOSECONDS;
4647
use datafusion::common::{
4748
exec_err, internal_err, not_impl_err, plan_err, substrait_datafusion_err,
4849
substrait_err, DFSchemaRef, ToDFSchema,
@@ -58,8 +59,8 @@ use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
5859
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
5960
use substrait::proto::expression::literal::map::KeyValue;
6061
use substrait::proto::expression::literal::{
61-
user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map,
62-
PrecisionTimestamp, Struct, UserDefined,
62+
IntervalCompound, IntervalDayToSecond, IntervalYearToMonth, List, Map,
63+
PrecisionTimestamp, Struct,
6364
};
6465
use substrait::proto::expression::subquery::InPredicate;
6566
use substrait::proto::expression::window_function::BoundsType;
@@ -1489,16 +1490,14 @@ fn to_substrait_type(
14891490
})),
14901491
}),
14911492
IntervalUnit::MonthDayNano => {
1492-
// Substrait doesn't currently support this type, so we represent it as a UDT
14931493
Ok(substrait::proto::Type {
1494-
kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
1495-
type_reference: extensions.register_type(
1496-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
1497-
),
1498-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1499-
nullability,
1500-
type_parameters: vec![],
1501-
})),
1494+
kind: Some(r#type::Kind::IntervalCompound(
1495+
r#type::IntervalCompound {
1496+
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1497+
nullability,
1498+
precision: 9, // nanos
1499+
},
1500+
)),
15021501
})
15031502
}
15041503
}
@@ -1892,23 +1891,21 @@ fn to_substrait_literal(
18921891
}),
18931892
DEFAULT_TYPE_VARIATION_REF,
18941893
),
1895-
ScalarValue::IntervalMonthDayNano(Some(i)) => {
1896-
// IntervalMonthDayNano is internally represented as a 128-bit integer, containing
1897-
// months (32bit), days (32bit), and nanoseconds (64bit)
1898-
let bytes = i.to_byte_slice();
1899-
(
1900-
LiteralType::UserDefined(UserDefined {
1901-
type_reference: extensions
1902-
.register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()),
1903-
type_parameters: vec![],
1904-
val: Some(user_defined::Val::Value(ProtoAny {
1905-
type_url: INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
1906-
value: bytes.to_vec().into(),
1907-
})),
1894+
ScalarValue::IntervalMonthDayNano(Some(i)) => (
1895+
LiteralType::IntervalCompound(IntervalCompound {
1896+
interval_year_to_month: Some(IntervalYearToMonth {
1897+
years: i.months / 12,
1898+
months: i.months % 12,
19081899
}),
1909-
DEFAULT_TYPE_VARIATION_REF,
1910-
)
1911-
}
1900+
interval_day_to_second: Some(IntervalDayToSecond {
1901+
days: i.days,
1902+
seconds: (i.nanoseconds / NANOSECONDS) as i32,
1903+
subseconds: i.nanoseconds % NANOSECONDS,
1904+
precision_mode: Some(PrecisionMode::Precision(9)), // nanoseconds
1905+
}),
1906+
}),
1907+
DEFAULT_TYPE_VARIATION_REF,
1908+
),
19121909
ScalarValue::IntervalDayTime(Some(i)) => (
19131910
LiteralType::IntervalDayToSecond(IntervalDayToSecond {
19141911
days: i.days,
@@ -2310,39 +2307,6 @@ mod test {
23102307
Ok(())
23112308
}
23122309

2313-
#[test]
2314-
fn custom_type_literal_extensions() -> Result<()> {
2315-
let mut extensions = Extensions::default();
2316-
// IntervalMonthDayNano is represented as a custom type in Substrait
2317-
let scalar = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(
2318-
17, 25, 1234567890,
2319-
)));
2320-
let substrait_literal = to_substrait_literal(&scalar, &mut extensions)?;
2321-
let roundtrip_scalar =
2322-
from_substrait_literal_without_names(&substrait_literal, &extensions)?;
2323-
assert_eq!(scalar, roundtrip_scalar);
2324-
2325-
assert_eq!(
2326-
extensions,
2327-
Extensions {
2328-
functions: HashMap::new(),
2329-
types: HashMap::from([(
2330-
0,
2331-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
2332-
)]),
2333-
type_variations: HashMap::new(),
2334-
}
2335-
);
2336-
2337-
// Check we fail if we don't propagate extensions
2338-
assert!(from_substrait_literal_without_names(
2339-
&substrait_literal,
2340-
&Extensions::default()
2341-
)
2342-
.is_err());
2343-
Ok(())
2344-
}
2345-
23462310
#[test]
23472311
fn round_trip_types() -> Result<()> {
23482312
round_trip_type(DataType::Boolean)?;
@@ -2424,37 +2388,6 @@ mod test {
24242388
Ok(())
24252389
}
24262390

2427-
#[test]
2428-
fn custom_type_extensions() -> Result<()> {
2429-
let mut extensions = Extensions::default();
2430-
// IntervalMonthDayNano is represented as a custom type in Substrait
2431-
let dt = DataType::Interval(IntervalUnit::MonthDayNano);
2432-
2433-
let substrait = to_substrait_type(&dt, true, &mut extensions)?;
2434-
let roundtrip_dt = from_substrait_type_without_names(&substrait, &extensions)?;
2435-
assert_eq!(dt, roundtrip_dt);
2436-
2437-
assert_eq!(
2438-
extensions,
2439-
Extensions {
2440-
functions: HashMap::new(),
2441-
types: HashMap::from([(
2442-
0,
2443-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
2444-
)]),
2445-
type_variations: HashMap::new(),
2446-
}
2447-
);
2448-
2449-
// Check we fail if we don't propagate extensions
2450-
assert!(
2451-
from_substrait_type_without_names(&substrait, &Extensions::default())
2452-
.is_err()
2453-
);
2454-
2455-
Ok(())
2456-
}
2457-
24582391
#[test]
24592392
fn named_struct_names() -> Result<()> {
24602393
let mut extensions = Extensions::default();

datafusion/substrait/src/variation_const.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,16 @@ pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2;
9696
/// [`ScalarValue::IntervalMonthDayNano`]: datafusion::common::ScalarValue::IntervalMonthDayNano
9797
#[deprecated(
9898
since = "41.0.0",
99-
note = "Use Substrait `UserDefinedType` with name `INTERVAL_MONTH_DAY_NANO_TYPE_NAME` instead"
99+
note = "Use Substrait `IntervalCompund` type instead"
100100
)]
101101
pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
102102

103103
/// For [`DataType::Interval`] with [`IntervalUnit::MonthDayNano`].
104104
///
105105
/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
106106
/// [`IntervalUnit::MonthDayNano`]: datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
107+
#[deprecated(
108+
since = "42.1.0",
109+
note = "Use Substrait `IntervalCompund` type instead"
110+
)]
107111
pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -230,23 +230,6 @@ async fn select_with_reused_functions() -> Result<()> {
230230
Ok(())
231231
}
232232

233-
#[tokio::test]
234-
async fn roundtrip_udt_extensions() -> Result<()> {
235-
let ctx = create_context().await?;
236-
let proto =
237-
roundtrip_with_ctx("SELECT INTERVAL '1 YEAR 1 DAY 1 SECOND' FROM data", ctx)
238-
.await?;
239-
let expected_type = SimpleExtensionDeclaration {
240-
mapping_type: Some(MappingType::ExtensionType(ExtensionType {
241-
extension_uri_reference: u32::MAX,
242-
type_anchor: 0,
243-
name: "interval-month-day-nano".to_string(),
244-
})),
245-
};
246-
assert_eq!(proto.extensions, vec![expected_type]);
247-
Ok(())
248-
}
249-
250233
#[tokio::test]
251234
async fn select_with_filter_date() -> Result<()> {
252235
roundtrip("SELECT * FROM data WHERE c > CAST('2020-01-01' AS DATE)").await

0 commit comments

Comments
 (0)