Skip to content

Commit 7b370e2

Browse files
authored
support OR operator in binary evaluate_bounds (#15716)
* support OR operator in binary `evaluate_bounds` * fixup tests
1 parent a50a525 commit 7b370e2

File tree

5 files changed

+116
-9
lines changed

5 files changed

+116
-9
lines changed

datafusion-examples/examples/expr_api.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ fn boundary_analysis_in_conjuctions_demo() -> Result<()> {
424424
//
425425
// But `AND` conjunctions are easier to reason with because their interval
426426
// arithmetic follows naturally from set intersection operations, let us
427-
// now look at an example that is a tad more complicated `OR` conjunctions.
427+
// now look at an example that is a tad more complicated `OR` disjunctions.
428428

429429
// The expression we will look at is `age > 60 OR age <= 18`.
430430
let age_greater_than_60_less_than_18 =
@@ -435,7 +435,7 @@ fn boundary_analysis_in_conjuctions_demo() -> Result<()> {
435435
//
436436
// Initial range: [14, 79] as described in our column statistics.
437437
//
438-
// From the left-hand side and right-hand side of our `OR` conjunctions
438+
// From the left-hand side and right-hand side of our `OR` disjunctions
439439
// we end up with two ranges, instead of just one.
440440
//
441441
// - age > 60: [61, 79]
@@ -446,7 +446,8 @@ fn boundary_analysis_in_conjuctions_demo() -> Result<()> {
446446
let physical_expr = SessionContext::new()
447447
.create_physical_expr(age_greater_than_60_less_than_18, &df_schema)?;
448448

449-
// Since we don't handle interval arithmetic for `OR` operator this will error out.
449+
// However, analysis only supports a single interval, so we don't yet deal
450+
// with the multiple possibilities of the `OR` disjunctions.
450451
let analysis = analyze(
451452
&physical_expr,
452453
AnalysisContext::new(initial_boundaries),

datafusion/expr-common/src/interval_arithmetic.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ impl Interval {
606606
upper: ScalarValue::Boolean(Some(upper)),
607607
})
608608
}
609-
_ => internal_err!("Incompatible data types for logical conjunction"),
609+
_ => internal_err!("Incompatible data types for logical disjunction"),
610610
}
611611
}
612612

@@ -971,6 +971,7 @@ pub fn apply_operator(op: &Operator, lhs: &Interval, rhs: &Interval) -> Result<I
971971
Operator::Lt => lhs.lt(rhs),
972972
Operator::LtEq => lhs.lt_eq(rhs),
973973
Operator::And => lhs.and(rhs),
974+
Operator::Or => lhs.or(rhs),
974975
Operator::Plus => lhs.add(rhs),
975976
Operator::Minus => lhs.sub(rhs),
976977
Operator::Multiply => lhs.mul(rhs),

datafusion/physical-expr/src/analysis.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ impl ExprBoundaries {
100100
) -> Result<Self> {
101101
let field = schema.fields().get(col_index).ok_or_else(|| {
102102
internal_datafusion_err!(
103-
"Could not create `ExprBoundaries`: in `try_from_column` `col_index`
103+
"Could not create `ExprBoundaries`: in `try_from_column` `col_index`
104104
has gone out of bounds with a value of {col_index}, the schema has {} columns.",
105105
schema.fields.len()
106106
)
@@ -425,7 +425,7 @@ mod tests {
425425
fn test_analyze_invalid_boundary_exprs() {
426426
let schema = Arc::new(Schema::new(vec![make_field("a", DataType::Int32)]));
427427
let expr = col("a").lt(lit(10)).or(col("a").gt(lit(20)));
428-
let expected_error = "Interval arithmetic does not support the operator OR";
428+
let expected_error = "OR operator cannot yet propagate true intervals";
429429
let boundaries = ExprBoundaries::try_new_unbounded(&schema).unwrap();
430430
let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
431431
let physical_expr =

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ impl PhysicalExpr for BinaryExpr {
516516
}
517517
} else if self.op.eq(&Operator::Or) {
518518
if interval.eq(&Interval::CERTAINLY_FALSE) {
519-
// A certainly false logical conjunction can only derive from certainly
519+
// A certainly false logical disjunction can only derive from certainly
520520
// false operands. Otherwise, we prove infeasibility.
521521
Ok((!left_interval.eq(&Interval::CERTAINLY_TRUE)
522522
&& !right_interval.eq(&Interval::CERTAINLY_TRUE))
@@ -5305,4 +5305,98 @@ mod tests {
53055305
assert!(matches!(result, ColumnarValue::Scalar(_)));
53065306
}
53075307
}
5308+
5309+
#[test]
5310+
fn test_evaluate_bounds_int32() {
5311+
let schema = Schema::new(vec![
5312+
Field::new("a", DataType::Int32, false),
5313+
Field::new("b", DataType::Int32, false),
5314+
]);
5315+
5316+
let a = Arc::new(Column::new("a", 0)) as _;
5317+
let b = Arc::new(Column::new("b", 1)) as _;
5318+
5319+
// Test addition bounds
5320+
let add_expr =
5321+
binary_expr(Arc::clone(&a), Operator::Plus, Arc::clone(&b), &schema).unwrap();
5322+
let add_bounds = add_expr
5323+
.evaluate_bounds(&[
5324+
&Interval::make(Some(1), Some(10)).unwrap(),
5325+
&Interval::make(Some(5), Some(15)).unwrap(),
5326+
])
5327+
.unwrap();
5328+
assert_eq!(add_bounds, Interval::make(Some(6), Some(25)).unwrap());
5329+
5330+
// Test subtraction bounds
5331+
let sub_expr =
5332+
binary_expr(Arc::clone(&a), Operator::Minus, Arc::clone(&b), &schema)
5333+
.unwrap();
5334+
let sub_bounds = sub_expr
5335+
.evaluate_bounds(&[
5336+
&Interval::make(Some(1), Some(10)).unwrap(),
5337+
&Interval::make(Some(5), Some(15)).unwrap(),
5338+
])
5339+
.unwrap();
5340+
assert_eq!(sub_bounds, Interval::make(Some(-14), Some(5)).unwrap());
5341+
5342+
// Test multiplication bounds
5343+
let mul_expr =
5344+
binary_expr(Arc::clone(&a), Operator::Multiply, Arc::clone(&b), &schema)
5345+
.unwrap();
5346+
let mul_bounds = mul_expr
5347+
.evaluate_bounds(&[
5348+
&Interval::make(Some(1), Some(10)).unwrap(),
5349+
&Interval::make(Some(5), Some(15)).unwrap(),
5350+
])
5351+
.unwrap();
5352+
assert_eq!(mul_bounds, Interval::make(Some(5), Some(150)).unwrap());
5353+
5354+
// Test division bounds
5355+
let div_expr =
5356+
binary_expr(Arc::clone(&a), Operator::Divide, Arc::clone(&b), &schema)
5357+
.unwrap();
5358+
let div_bounds = div_expr
5359+
.evaluate_bounds(&[
5360+
&Interval::make(Some(10), Some(20)).unwrap(),
5361+
&Interval::make(Some(2), Some(5)).unwrap(),
5362+
])
5363+
.unwrap();
5364+
assert_eq!(div_bounds, Interval::make(Some(2), Some(10)).unwrap());
5365+
}
5366+
5367+
#[test]
5368+
fn test_evaluate_bounds_bool() {
5369+
let schema = Schema::new(vec![
5370+
Field::new("a", DataType::Boolean, false),
5371+
Field::new("b", DataType::Boolean, false),
5372+
]);
5373+
5374+
let a = Arc::new(Column::new("a", 0)) as _;
5375+
let b = Arc::new(Column::new("b", 1)) as _;
5376+
5377+
// Test OR bounds
5378+
let or_expr =
5379+
binary_expr(Arc::clone(&a), Operator::Or, Arc::clone(&b), &schema).unwrap();
5380+
let or_bounds = or_expr
5381+
.evaluate_bounds(&[
5382+
&Interval::make(Some(true), Some(true)).unwrap(),
5383+
&Interval::make(Some(false), Some(false)).unwrap(),
5384+
])
5385+
.unwrap();
5386+
assert_eq!(or_bounds, Interval::make(Some(true), Some(true)).unwrap());
5387+
5388+
// Test AND bounds
5389+
let and_expr =
5390+
binary_expr(Arc::clone(&a), Operator::And, Arc::clone(&b), &schema).unwrap();
5391+
let and_bounds = and_expr
5392+
.evaluate_bounds(&[
5393+
&Interval::make(Some(true), Some(true)).unwrap(),
5394+
&Interval::make(Some(false), Some(false)).unwrap(),
5395+
])
5396+
.unwrap();
5397+
assert_eq!(
5398+
and_bounds,
5399+
Interval::make(Some(false), Some(false)).unwrap()
5400+
);
5401+
}
53085402
}

datafusion/physical-expr/src/intervals/cp_solver.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,12 @@ use std::sync::Arc;
148148
use super::utils::{
149149
convert_duration_type_to_interval, convert_interval_type_to_duration, get_inverse_op,
150150
};
151-
use crate::expressions::Literal;
151+
use crate::expressions::{BinaryExpr, Literal};
152152
use crate::utils::{build_dag, ExprTreeNode};
153153
use crate::PhysicalExpr;
154154

155155
use arrow::datatypes::{DataType, Schema};
156-
use datafusion_common::{internal_err, Result};
156+
use datafusion_common::{internal_err, not_impl_err, Result};
157157
use datafusion_expr::interval_arithmetic::{apply_operator, satisfy_greater, Interval};
158158
use datafusion_expr::Operator;
159159

@@ -645,6 +645,17 @@ impl ExprIntervalGraph {
645645
.map(|child| self.graph[*child].interval())
646646
.collect::<Vec<_>>();
647647
let node_interval = self.graph[node].interval();
648+
// Special case: true OR could in principle be propagated by 3 interval sets,
649+
// (i.e. left true, or right true, or both true) however we do not support this yet.
650+
if node_interval == &Interval::CERTAINLY_TRUE
651+
&& self.graph[node]
652+
.expr
653+
.as_any()
654+
.downcast_ref::<BinaryExpr>()
655+
.is_some_and(|expr| expr.op() == &Operator::Or)
656+
{
657+
return not_impl_err!("OR operator cannot yet propagate true intervals");
658+
}
648659
let propagated_intervals = self.graph[node]
649660
.expr
650661
.propagate_constraints(node_interval, &children_intervals)?;

0 commit comments

Comments
 (0)