Skip to content

hack: don't coerce utf8 view inside dictionary #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ config_namespace! {
/// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected
/// and recorded in the logical plan nodes.
pub collect_spans: bool, default = false

/// Specifies the recursion depth limit when parsing complex SQL Queries
pub recursion_limit: usize, default = 50
}
}

Expand Down
5 changes: 4 additions & 1 deletion datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,10 @@ impl SessionState {
)
})?;

let expr = DFParser::parse_sql_into_expr_with_dialect(sql, dialect.as_ref())?;
let mut parser = DFParser::new_with_dialect(sql, dialect.as_ref())?
.with_recursion_limit(self.config.options().sql_parser.recursion_limit);

let expr = parser.parse_expr()?;

Ok(expr)
}
Expand Down
10 changes: 9 additions & 1 deletion datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ impl<'a> BinaryTypeCoercer<'a> {
}
And | Or => if matches!((self.lhs, self.rhs), (Boolean | Null, Boolean | Null)) {
// Logical binary boolean operators can only be evaluated for
// boolean or null arguments.
// boolean or null arguments.
Ok(Signature::uniform(Boolean))
} else {
plan_err!(
Expand Down Expand Up @@ -502,6 +502,14 @@ pub fn type_union_resolution(data_types: &[DataType]) -> Option<DataType> {
}
}

// HACK: DataType::Utf8View is not a meaningful dictionary value type, so
// just pull this out if we dict-encoded it
if let Some(DataType::Dictionary(_, value_type)) = &candidate_type {
if value_type.as_ref() == &DataType::Utf8View {
candidate_type = Some(DataType::Utf8View);
}
}

candidate_type
}

Expand Down
7 changes: 7 additions & 0 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,13 @@ impl<'a> DFParser<'a> {
})
}

/// Specify the maximum recursion limit while parsing.
pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
self.parser = self.parser.with_recursion_limit(recursion_limit);
self
}


/// Parse a sql string into one or [`Statement`]s using the
/// [`GenericDialect`].
pub fn parse_sql(sql: &str) -> Result<VecDeque<Statement>, ParserError> {
Expand Down
4 changes: 1 addition & 3 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1617,9 +1617,7 @@ impl Unparser<'_> {
DataType::Union(_, _) => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
DataType::Dictionary(_, _) => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
DataType::Dictionary(_, val) => self.arrow_dtype_to_ast_dtype(val),
DataType::Decimal128(precision, scale)
| DataType::Decimal256(precision, scale) => {
let mut new_precision = *precision as u64;
Expand Down
Loading