diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 5e8317c081d9..1a4a6068a3ad 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -256,6 +256,9 @@ config_namespace! { /// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected /// and recorded in the logical plan nodes. pub collect_spans: bool, default = false + + /// Specifies the recursion depth limit when parsing complex SQL Queries + pub recursion_limit: usize, default = 50 } } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index f1abf30c0c54..fb90dda25414 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -483,12 +483,21 @@ impl SessionState { MsSQL, ClickHouse, BigQuery, Ansi." ) })?; - let mut statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?; + + let recursion_limit = self.config.options().sql_parser.recursion_limit; + + let mut statements = DFParser::parse_sql_with_dialect_limit( + sql, + dialect.as_ref(), + recursion_limit, + )?; + if statements.len() > 1 { return not_impl_err!( "The context currently only supports a single SQL statement" ); } + let statement = statements.pop_front().ok_or_else(|| { plan_datafusion_err!("No SQL statements were provided in the query string") })?; @@ -522,7 +531,12 @@ impl SessionState { ) })?; - let expr = DFParser::parse_sql_into_expr_with_dialect(sql, dialect.as_ref())?; + let recursion_limit = self.config.options().sql_parser.recursion_limit; + let expr = DFParser::parse_sql_into_expr_with_dialect_limit( + sql, + dialect.as_ref(), + recursion_limit, + )?; Ok(expr) } diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 9725166b8ae0..c167e211f0d7 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -257,6 +257,9 @@ fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { Ok(()) } +/// Same as `sqlparser` +const DEFAULT_RECURSION_LIMIT: usize = 50; + /// DataFusion SQL Parser based on [`sqlparser`] /// /// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`]. @@ -282,12 +285,23 @@ impl<'a> DFParser<'a> { pub fn new_with_dialect( sql: &str, dialect: &'a dyn Dialect, + ) -> Result { + DFParser::new_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT) + } + /// Create a new parser for the specified tokens with the + /// specified dialect and recursion limit + pub fn new_with_dialect_limit( + sql: &str, + dialect: &'a dyn Dialect, + recursion_limit: usize, ) -> Result { let mut tokenizer = Tokenizer::new(dialect, sql); let tokens = tokenizer.tokenize_with_location()?; Ok(DFParser { - parser: Parser::new(dialect).with_tokens_with_locations(tokens), + parser: Parser::new(dialect) + .with_tokens_with_locations(tokens) + .with_recursion_limit(recursion_limit), }) } @@ -295,7 +309,7 @@ impl<'a> DFParser<'a> { /// [`GenericDialect`]. pub fn parse_sql(sql: &str) -> Result, ParserError> { let dialect = &GenericDialect {}; - DFParser::parse_sql_with_dialect(sql, dialect) + DFParser::parse_sql_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT) } /// Parse a SQL string and produce one or more [`Statement`]s with @@ -304,7 +318,17 @@ impl<'a> DFParser<'a> { sql: &str, dialect: &dyn Dialect, ) -> Result, ParserError> { - let mut parser = DFParser::new_with_dialect(sql, dialect)?; + DFParser::parse_sql_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT) + } + + /// Parse a SQL string and produce one or more [`Statement`]s with + /// with the specified dialect and recursion limit + pub fn parse_sql_with_dialect_limit( + sql: &str, + dialect: &dyn Dialect, + recursion_limit: usize, + ) -> Result, ParserError> { + let mut parser = DFParser::new_with_dialect_limit(sql, dialect, recursion_limit)?; let mut stmts = VecDeque::new(); let mut expecting_statement_delimiter = false; loop { @@ -331,7 +355,19 @@ impl<'a> DFParser<'a> { sql: &str, dialect: &dyn Dialect, ) -> Result { - let mut parser = DFParser::new_with_dialect(sql, dialect)?; + DFParser::parse_sql_into_expr_with_dialect_limit( + sql, + dialect, + DEFAULT_RECURSION_LIMIT, + ) + } + + pub fn parse_sql_into_expr_with_dialect_limit( + sql: &str, + dialect: &dyn Dialect, + recursion_limit: usize, + ) -> Result { + let mut parser = DFParser::new_with_dialect_limit(sql, dialect, recursion_limit)?; parser.parse_expr() } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 7c56969d47cd..87f78d661f51 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1624,9 +1624,7 @@ impl Unparser<'_> { DataType::Union(_, _) => { not_impl_err!("Unsupported DataType: conversion: {data_type:?}") } - DataType::Dictionary(_, _) => { - not_impl_err!("Unsupported DataType: conversion: {data_type:?}") - } + DataType::Dictionary(_, val) => self.arrow_dtype_to_ast_dtype(val), DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => { let mut new_precision = *precision as u64; diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 5a1caad46732..f1f2785b370e 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -262,6 +262,7 @@ datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false datafusion.sql_parser.parse_float_as_decimal false +datafusion.sql_parser.recursion_limit 50 datafusion.sql_parser.support_varchar_with_length true # show all variables with verbose @@ -357,6 +358,7 @@ datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusi datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type +datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. # show_variable_in_config_options diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 999735f4c059..0ee136387d38 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -127,3 +127,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. | +| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |