Skip to content

Commit b25ba57

Browse files
committed
Reduce size of Expr
1 parent 2797cf7 commit b25ba57

File tree

6 files changed

+106
-80
lines changed

6 files changed

+106
-80
lines changed

datafusion/expr/src/expr.rs

Lines changed: 47 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ pub enum Expr {
297297
/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
298298
AggregateFunction(AggregateFunction),
299299
/// Represents the call of a window function with arguments.
300-
WindowFunction(WindowFunction),
300+
WindowFunction(Box<WindowFunction>), // Boxed as it is large (272 bytes)
301301
/// Returns whether the list contains the expr value.
302302
InList(InList),
303303
/// EXISTS subquery
@@ -341,6 +341,13 @@ impl From<Column> for Expr {
341341
}
342342
}
343343

344+
/// Create an [`Expr`] from a [`WindowFunction`]
345+
impl From<WindowFunction> for Expr {
346+
fn from(value: WindowFunction) -> Self {
347+
Expr::WindowFunction(Box::new(value))
348+
}
349+
}
350+
344351
/// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
345352
/// useful for creating [`Expr`] from a [`DFSchema`].
346353
///
@@ -1893,24 +1900,24 @@ impl NormalizeEq for Expr {
18931900
_ => false,
18941901
}
18951902
}
1896-
(
1897-
Expr::WindowFunction(WindowFunction {
1903+
(Expr::WindowFunction(left), Expr::WindowFunction(right)) => {
1904+
let WindowFunction {
18981905
fun: self_fun,
18991906
args: self_args,
19001907
partition_by: self_partition_by,
19011908
order_by: self_order_by,
19021909
window_frame: self_window_frame,
19031910
null_treatment: self_null_treatment,
1904-
}),
1905-
Expr::WindowFunction(WindowFunction {
1911+
} = left.as_ref();
1912+
let WindowFunction {
19061913
fun: other_fun,
19071914
args: other_args,
19081915
partition_by: other_partition_by,
19091916
order_by: other_order_by,
19101917
window_frame: other_window_frame,
19111918
null_treatment: other_null_treatment,
1912-
}),
1913-
) => {
1919+
} = right.as_ref();
1920+
19141921
self_fun.name() == other_fun.name()
19151922
&& self_window_frame == other_window_frame
19161923
&& self_null_treatment == other_null_treatment
@@ -2150,14 +2157,15 @@ impl HashNode for Expr {
21502157
distinct.hash(state);
21512158
null_treatment.hash(state);
21522159
}
2153-
Expr::WindowFunction(WindowFunction {
2154-
fun,
2155-
args: _args,
2156-
partition_by: _partition_by,
2157-
order_by: _order_by,
2158-
window_frame,
2159-
null_treatment,
2160-
}) => {
2160+
Expr::WindowFunction(window_func) => {
2161+
let WindowFunction {
2162+
fun,
2163+
args: _args,
2164+
partition_by: _partition_by,
2165+
order_by: _order_by,
2166+
window_frame,
2167+
null_treatment,
2168+
} = window_func.as_ref();
21612169
fun.hash(state);
21622170
window_frame.hash(state);
21632171
null_treatment.hash(state);
@@ -2458,14 +2466,15 @@ impl Display for SchemaDisplay<'_> {
24582466

24592467
Ok(())
24602468
}
2461-
Expr::WindowFunction(WindowFunction {
2462-
fun,
2463-
args,
2464-
partition_by,
2465-
order_by,
2466-
window_frame,
2467-
null_treatment,
2468-
}) => {
2469+
Expr::WindowFunction(window_func) => {
2470+
let WindowFunction {
2471+
fun,
2472+
args,
2473+
partition_by,
2474+
order_by,
2475+
window_frame,
2476+
null_treatment,
2477+
} = window_func.as_ref();
24692478
write!(
24702479
f,
24712480
"{}({})",
@@ -2612,14 +2621,16 @@ impl Display for Expr {
26122621
// Expr::ScalarFunction(ScalarFunction { func, args }) => {
26132622
// write!(f, "{}", func.display_name(args).unwrap())
26142623
// }
2615-
Expr::WindowFunction(WindowFunction {
2616-
fun,
2617-
args,
2618-
partition_by,
2619-
order_by,
2620-
window_frame,
2621-
null_treatment,
2622-
}) => {
2624+
Expr::WindowFunction(window_func) => {
2625+
let WindowFunction {
2626+
fun,
2627+
args,
2628+
partition_by,
2629+
order_by,
2630+
window_frame,
2631+
null_treatment,
2632+
} = window_func.as_ref();
2633+
26232634
fmt_function(f, &fun.to_string(), false, args, true)?;
26242635

26252636
if let Some(nt) = null_treatment {
@@ -3076,6 +3087,10 @@ mod test {
30763087
// If this test fails when you change `Expr`, please try
30773088
// `Box`ing the fields to make `Expr` smaller
30783089
// See https://github.com/apache/datafusion/issues/14256 for details
3079-
assert_eq!(size_of::<Expr>(), 272);
3090+
assert_eq!(size_of::<Expr>(), 112);
3091+
assert_eq!(size_of::<ScalarValue>(), 64);
3092+
assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
3093+
assert_eq!(size_of::<Vec<Expr>>(), 24);
3094+
assert_eq!(size_of::<Arc<Expr>>(), 8);
30803095
}
30813096
}

datafusion/expr/src/expr_fn.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ impl ExprFuncBuilder {
843843
udwf.window_frame =
844844
window_frame.unwrap_or(WindowFrame::new(has_order_by));
845845
udwf.null_treatment = null_treatment;
846-
Expr::WindowFunction(udwf)
846+
Expr::from(udwf)
847847
}
848848
};
849849

@@ -897,7 +897,7 @@ impl ExprFunctionExt for Expr {
897897
ExprFuncBuilder::new(Some(ExprFuncKind::Aggregate(udaf)))
898898
}
899899
Expr::WindowFunction(udwf) => {
900-
ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)))
900+
ExprFuncBuilder::new(Some(ExprFuncKind::Window(*udwf)))
901901
}
902902
_ => ExprFuncBuilder::new(None),
903903
};
@@ -937,7 +937,7 @@ impl ExprFunctionExt for Expr {
937937
ExprFuncBuilder::new(Some(ExprFuncKind::Aggregate(udaf)))
938938
}
939939
Expr::WindowFunction(udwf) => {
940-
ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)))
940+
ExprFuncBuilder::new(Some(ExprFuncKind::Window(*udwf)))
941941
}
942942
_ => ExprFuncBuilder::new(None),
943943
};
@@ -950,7 +950,7 @@ impl ExprFunctionExt for Expr {
950950
fn partition_by(self, partition_by: Vec<Expr>) -> ExprFuncBuilder {
951951
match self {
952952
Expr::WindowFunction(udwf) => {
953-
let mut builder = ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)));
953+
let mut builder = ExprFuncBuilder::new(Some(ExprFuncKind::Window(*udwf)));
954954
builder.partition_by = Some(partition_by);
955955
builder
956956
}
@@ -961,7 +961,7 @@ impl ExprFunctionExt for Expr {
961961
fn window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder {
962962
match self {
963963
Expr::WindowFunction(udwf) => {
964-
let mut builder = ExprFuncBuilder::new(Some(ExprFuncKind::Window(udwf)));
964+
let mut builder = ExprFuncBuilder::new(Some(ExprFuncKind::Window(*udwf)));
965965
builder.window_frame = Some(window_frame);
966966
builder
967967
}

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2420,19 +2420,24 @@ impl Window {
24202420
.iter()
24212421
.enumerate()
24222422
.filter_map(|(idx, expr)| {
2423-
if let Expr::WindowFunction(WindowFunction {
2423+
let Expr::WindowFunction(window_func) = expr else {
2424+
return None;
2425+
};
2426+
let WindowFunction {
24242427
fun: WindowFunctionDefinition::WindowUDF(udwf),
24252428
partition_by,
24262429
..
2427-
}) = expr
2428-
{
2429-
// When there is no PARTITION BY, row number will be unique
2430-
// across the entire table.
2431-
if udwf.name() == "row_number" && partition_by.is_empty() {
2432-
return Some(idx + input_len);
2433-
}
2430+
} = window_func.as_ref()
2431+
else {
2432+
return None;
2433+
};
2434+
// When there is no PARTITION BY, row number will be unique
2435+
// across the entire table.
2436+
if udwf.name() == "row_number" && partition_by.is_empty() {
2437+
return Some(idx + input_len);
2438+
} else {
2439+
None
24342440
}
2435-
None
24362441
})
24372442
.map(|idx| {
24382443
FunctionalDependence::new(vec![idx], vec![], false)

datafusion/expr/src/tree_node.rs

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,13 @@ impl TreeNode for Expr {
8989
(expr, when_then_expr, else_expr).apply_ref_elements(f),
9090
Expr::AggregateFunction(AggregateFunction { args, filter, order_by, .. }) =>
9191
(args, filter, order_by).apply_ref_elements(f),
92-
Expr::WindowFunction(WindowFunction {
93-
args,
94-
partition_by,
95-
order_by,
96-
..
97-
}) => {
92+
Expr::WindowFunction(window_func) => {
93+
let WindowFunction {
94+
args,
95+
partition_by,
96+
order_by,
97+
..
98+
} = window_func.as_ref();
9899
(args, partition_by, order_by).apply_ref_elements(f)
99100
}
100101
Expr::InList(InList { expr, list, .. }) => {
@@ -222,24 +223,28 @@ impl TreeNode for Expr {
222223
)))
223224
})?
224225
}
225-
Expr::WindowFunction(WindowFunction {
226-
args,
227-
fun,
228-
partition_by,
229-
order_by,
230-
window_frame,
231-
null_treatment,
232-
}) => (args, partition_by, order_by).map_elements(f)?.update_data(
233-
|(new_args, new_partition_by, new_order_by)| {
234-
Expr::WindowFunction(WindowFunction::new(fun, new_args))
235-
.partition_by(new_partition_by)
236-
.order_by(new_order_by)
237-
.window_frame(window_frame)
238-
.null_treatment(null_treatment)
239-
.build()
240-
.unwrap()
241-
},
242-
),
226+
Expr::WindowFunction(window_func) => {
227+
let WindowFunction {
228+
args,
229+
fun,
230+
partition_by,
231+
order_by,
232+
window_frame,
233+
null_treatment,
234+
} = *window_func;
235+
236+
(args, partition_by, order_by).map_elements(f)?.update_data(
237+
|(new_args, new_partition_by, new_order_by)| {
238+
Expr::from(WindowFunction::new(fun, new_args))
239+
.partition_by(new_partition_by)
240+
.order_by(new_order_by)
241+
.window_frame(window_frame)
242+
.null_treatment(null_treatment)
243+
.build()
244+
.unwrap()
245+
},
246+
)
247+
}
243248
Expr::AggregateFunction(AggregateFunction {
244249
args,
245250
func,

datafusion/expr/src/udwf.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ impl WindowUDF {
130130
pub fn call(&self, args: Vec<Expr>) -> Expr {
131131
let fun = crate::WindowFunctionDefinition::WindowUDF(Arc::new(self.clone()));
132132

133-
Expr::WindowFunction(WindowFunction::new(fun, args))
133+
Expr::from(WindowFunction::new(fun, args))
134134
}
135135

136136
/// Returns this function's name

datafusion/expr/src/utils.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,8 @@ pub fn group_window_expr_by_sort_keys(
588588
) -> Result<Vec<(WindowSortKey, Vec<Expr>)>> {
589589
let mut result = vec![];
590590
window_expr.into_iter().try_for_each(|expr| match &expr {
591-
Expr::WindowFunction( WindowFunction{ partition_by, order_by, .. }) => {
591+
Expr::WindowFunction( window_func) => {
592+
let WindowFunction{ partition_by, order_by, .. } = window_func.as_ref();
592593
let sort_key = generate_sort_key(partition_by, order_by)?;
593594
if let Some((_, values)) = result.iter_mut().find(
594595
|group: &&mut (WindowSortKey, Vec<Expr>)| matches!(group, (key, _) if *key == sort_key),
@@ -1439,19 +1440,19 @@ mod tests {
14391440

14401441
#[test]
14411442
fn test_group_window_expr_by_sort_keys_empty_window() -> Result<()> {
1442-
let max1 = Expr::WindowFunction(WindowFunction::new(
1443+
let max1 = Expr::from(WindowFunction::new(
14431444
WindowFunctionDefinition::AggregateUDF(max_udaf()),
14441445
vec![col("name")],
14451446
));
1446-
let max2 = Expr::WindowFunction(WindowFunction::new(
1447+
let max2 = Expr::from(WindowFunction::new(
14471448
WindowFunctionDefinition::AggregateUDF(max_udaf()),
14481449
vec![col("name")],
14491450
));
1450-
let min3 = Expr::WindowFunction(WindowFunction::new(
1451+
let min3 = Expr::from(WindowFunction::new(
14511452
WindowFunctionDefinition::AggregateUDF(min_udaf()),
14521453
vec![col("name")],
14531454
));
1454-
let sum4 = Expr::WindowFunction(WindowFunction::new(
1455+
let sum4 = Expr::from(WindowFunction::new(
14551456
WindowFunctionDefinition::AggregateUDF(sum_udaf()),
14561457
vec![col("age")],
14571458
));
@@ -1469,25 +1470,25 @@ mod tests {
14691470
let age_asc = Sort::new(col("age"), true, true);
14701471
let name_desc = Sort::new(col("name"), false, true);
14711472
let created_at_desc = Sort::new(col("created_at"), false, true);
1472-
let max1 = Expr::WindowFunction(WindowFunction::new(
1473+
let max1 = Expr::from(WindowFunction::new(
14731474
WindowFunctionDefinition::AggregateUDF(max_udaf()),
14741475
vec![col("name")],
14751476
))
14761477
.order_by(vec![age_asc.clone(), name_desc.clone()])
14771478
.build()
14781479
.unwrap();
1479-
let max2 = Expr::WindowFunction(WindowFunction::new(
1480+
let max2 = Expr::from(WindowFunction::new(
14801481
WindowFunctionDefinition::AggregateUDF(max_udaf()),
14811482
vec![col("name")],
14821483
));
1483-
let min3 = Expr::WindowFunction(WindowFunction::new(
1484+
let min3 = Expr::from(WindowFunction::new(
14841485
WindowFunctionDefinition::AggregateUDF(min_udaf()),
14851486
vec![col("name")],
14861487
))
14871488
.order_by(vec![age_asc.clone(), name_desc.clone()])
14881489
.build()
14891490
.unwrap();
1490-
let sum4 = Expr::WindowFunction(WindowFunction::new(
1491+
let sum4 = Expr::from(WindowFunction::new(
14911492
WindowFunctionDefinition::AggregateUDF(sum_udaf()),
14921493
vec![col("age")],
14931494
))

0 commit comments

Comments
 (0)