diff --git a/Cargo.toml b/Cargo.toml index 301a59c55..8ff0ceb55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,8 +37,9 @@ name = "sqlparser" path = "src/lib.rs" [features] -default = ["std"] +default = ["std", "recursive-protection"] std = [] +recursive-protection = ["std", "recursive"] # Enable JSON output in the `cli` example: json_example = ["serde_json", "serde"] visitor = ["sqlparser_derive"] @@ -46,6 +47,8 @@ visitor = ["sqlparser_derive"] [dependencies] bigdecimal = { version = "0.4.1", features = ["serde"], optional = true } log = "0.4" +recursive = { version = "0.1.1", optional = true} + serde = { version = "1.0", features = ["derive"], optional = true } # serde_json is only used in examples/cli, but we have to put it outside # of dev-dependencies because of diff --git a/README.md b/README.md index fd676d115..997aec585 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ The following optional [crate features](https://doc.rust-lang.org/cargo/referen * `serde`: Adds [Serde](https://serde.rs/) support by implementing `Serialize` and `Deserialize` for all AST nodes. * `visitor`: Adds a `Visitor` capable of recursively walking the AST tree. - +* `recursive-protection` (enabled by default), uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection. ## Syntax vs Semantics diff --git a/derive/src/lib.rs b/derive/src/lib.rs index b81623312..08c5c5db4 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -78,7 +78,10 @@ fn derive_visit(input: proc_macro::TokenStream, visit_type: &VisitType) -> proc_ let expanded = quote! { // The generated impl. + // Note that it uses [`recursive::recursive`] to protect from stack overflow. + // See tests in https://github.com/apache/datafusion-sqlparser-rs/pull/1522/ for more info. impl #impl_generics sqlparser::ast::#visit_trait for #name #ty_generics #where_clause { + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] fn visit( &#modifier self, visitor: &mut V diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 32a6da1bc..74cac5c97 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -42,6 +42,46 @@ fn basic_queries(c: &mut Criterion) { group.bench_function("sqlparser::with_select", |b| { b.iter(|| Parser::parse_sql(&dialect, with_query).unwrap()); }); + + let large_statement = { + let expressions = (0..1000) + .map(|n| format!("FN_{}(COL_{})", n, n)) + .collect::>() + .join(", "); + let tables = (0..1000) + .map(|n| format!("TABLE_{}", n)) + .collect::>() + .join(" JOIN "); + let where_condition = (0..1000) + .map(|n| format!("COL_{} = {}", n, n)) + .collect::>() + .join(" OR "); + let order_condition = (0..1000) + .map(|n| format!("COL_{} DESC", n)) + .collect::>() + .join(", "); + + format!( + "SELECT {} FROM {} WHERE {} ORDER BY {}", + expressions, tables, where_condition, order_condition + ) + }; + + group.bench_function("parse_large_statement", |b| { + b.iter(|| Parser::parse_sql(&dialect, criterion::black_box(large_statement.as_str()))); + }); + + let large_statement = Parser::parse_sql(&dialect, large_statement.as_str()) + .unwrap() + .pop() + .unwrap(); + + group.bench_function("format_large_statement", |b| { + b.iter(|| { + let formatted_query = large_statement.to_string(); + assert_eq!(formatted_query, large_statement); + }); + }); } criterion_group!(benches, basic_queries); diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6e3f20472..4a5e1e44f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1291,6 +1291,7 @@ impl fmt::Display for CastFormat { } impl fmt::Display for Expr { + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{s}"), diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index f7562b66c..c824ad2f3 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -894,4 +894,29 @@ mod tests { assert_eq!(actual, expected) } } + + struct QuickVisitor; // [`TestVisitor`] is too slow to iterate over thousands of nodes + + impl Visitor for QuickVisitor { + type Break = (); + } + + #[test] + fn overflow() { + let cond = (0..1000) + .map(|n| format!("X = {}", n)) + .collect::>() + .join(" OR "); + let sql = format!("SELECT x where {0}", cond); + + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, sql.as_str()).tokenize().unwrap(); + let s = Parser::new(&dialect) + .with_tokens(tokens) + .parse_statement() + .unwrap(); + + let mut visitor = QuickVisitor {}; + s.visit(&mut visitor); + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 94d63cf80..c7cde5742 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -73,6 +73,9 @@ mod recursion { /// Note: Uses an [`std::rc::Rc`] and [`std::cell::Cell`] in order to satisfy the Rust /// borrow checker so the automatic [`DepthGuard`] decrement a /// reference to the counter. + /// + /// Note: when "recursive-protection" feature is enabled, this crate uses additional stack overflow protection + /// for some of its recursive methods. See [`recursive::recursive`] for more information. pub(crate) struct RecursionCounter { remaining_depth: Rc>, } @@ -326,6 +329,9 @@ impl<'a> Parser<'a> { /// # Ok(()) /// # } /// ``` + /// + /// Note: when "recursive-protection" feature is enabled, this crate uses additional stack overflow protection + // for some of its recursive methods. See [`recursive::recursive`] for more information. pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self { self.recursion_counter = RecursionCounter::new(recursion_limit); self diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1bf9383af..4d3b11942 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12426,3 +12426,16 @@ fn test_table_sample() { dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); } + +#[test] +fn overflow() { + let expr = std::iter::repeat("1") + .take(1000) + .collect::>() + .join(" + "); + let sql = format!("SELECT {}", expr); + + let mut statements = Parser::parse_sql(&GenericDialect {}, sql.as_str()).unwrap(); + let statement = statements.pop().unwrap(); + assert_eq!(statement.to_string(), sql); +}