diff --git a/crates/pgt_statement_splitter/src/lib.rs b/crates/pgt_statement_splitter/src/lib.rs index 06440da1..3fa67213 100644 --- a/crates/pgt_statement_splitter/src/lib.rs +++ b/crates/pgt_statement_splitter/src/lib.rs @@ -136,6 +136,24 @@ mod tests { )]); } + #[test] + fn command_between_not_starting() { + Tester::from("select 1\n \\com test\nselect 2") + .expect_statements(vec!["select 1", "select 2"]); + } + + #[test] + fn command_between() { + Tester::from("select 1\n\\com test\nselect 2") + .expect_statements(vec!["select 1", "select 2"]); + } + + #[test] + fn command_standalone() { + Tester::from("select 1\n\n\\com test\n\nselect 2") + .expect_statements(vec!["select 1", "select 2"]); + } + #[test] fn insert_with_select() { Tester::from("\ninsert into tbl (id) select 1\n\nselect 3") diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index c94fe245..183abd97 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -113,6 +113,9 @@ impl Parser { } } + /// Advances the parser to the next relevant token and returns it. + /// + /// NOTE: This will skip irrelevant tokens. fn advance(&mut self) -> &Token { // can't reuse any `find_next_relevant` logic because of Mr. Borrow Checker let (pos, token) = self diff --git a/crates/pgt_statement_splitter/src/parser/common.rs b/crates/pgt_statement_splitter/src/parser/common.rs index 1a355f08..a353791b 100644 --- a/crates/pgt_statement_splitter/src/parser/common.rs +++ b/crates/pgt_statement_splitter/src/parser/common.rs @@ -1,4 +1,4 @@ -use pgt_lexer::{SyntaxKind, Token, TokenType}; +use pgt_lexer::{SyntaxKind, Token, TokenType, WHITESPACE_TOKENS}; use super::{ Parser, @@ -24,6 +24,12 @@ pub fn source(p: &mut Parser) { } => { p.advance(); } + Token { + kind: SyntaxKind::Ascii92, + .. + } => { + plpgsql_command(p); + } _ => { statement(p); } @@ -87,6 +93,24 @@ pub(crate) fn parenthesis(p: &mut Parser) { } } +pub(crate) fn plpgsql_command(p: &mut Parser) { + p.expect(SyntaxKind::Ascii92); + + loop { + match p.current().kind { + SyntaxKind::Newline => { + p.advance(); + break; + } + _ => { + // advance the parser to the next token without ignoring irrelevant tokens + // we would skip a newline with `advance()` + p.current_pos += 1; + } + } + } +} + pub(crate) fn case(p: &mut Parser) { p.expect(SyntaxKind::Case); @@ -125,6 +149,36 @@ pub(crate) fn unknown(p: &mut Parser, exclude: &[SyntaxKind]) { } => { case(p); } + Token { + kind: SyntaxKind::Ascii92, + .. + } => { + // pgsql commands e.g. + // + // ``` + // \if test + // ``` + // + // we wait for "\" and check if the previous token is a newline + + // newline is a whitespace, but we do not want to ignore it here + let irrelevant = WHITESPACE_TOKENS + .iter() + .filter(|t| **t != SyntaxKind::Newline) + .collect::>(); + + // go back from the current position without ignoring irrelevant tokens + if p.tokens + .iter() + .take(p.current_pos) + .rev() + .find(|t| !irrelevant.contains(&&t.kind)) + .is_some_and(|t| t.kind == SyntaxKind::Newline) + { + break; + } + p.advance(); + } Token { kind: SyntaxKind::Ascii40, ..