diff --git a/crates/pgt_lexer/src/lexed.rs b/crates/pgt_lexer/src/lexed.rs index 08286bbfc..2e46d7840 100644 --- a/crates/pgt_lexer/src/lexed.rs +++ b/crates/pgt_lexer/src/lexed.rs @@ -23,6 +23,7 @@ pub struct LexDiagnostic { } /// Result of lexing a string, providing access to tokens and diagnostics +#[derive(Debug)] pub struct Lexed<'a> { pub(crate) text: &'a str, pub(crate) kind: Vec, diff --git a/crates/pgt_statement_splitter/src/lib.rs b/crates/pgt_statement_splitter/src/lib.rs index 03fe8fd40..94b87534b 100644 --- a/crates/pgt_statement_splitter/src/lib.rs +++ b/crates/pgt_statement_splitter/src/lib.rs @@ -19,7 +19,7 @@ pub fn split(sql: &str) -> SplitResult { let mut splitter = Splitter::new(&lexed); - source(&mut splitter); + let _ = source(&mut splitter); let split_result = splitter.finish(); @@ -587,4 +587,10 @@ VALUES TextRange::new(0.into(), 6.into()), )]); } + + #[test] + fn does_not_panic_on_incomplete_statements() { + // does not panic + let _ = Tester::from("select case "); + } } diff --git a/crates/pgt_statement_splitter/src/splitter.rs b/crates/pgt_statement_splitter/src/splitter.rs index 2905a3e7d..2ce382085 100644 --- a/crates/pgt_statement_splitter/src/splitter.rs +++ b/crates/pgt_statement_splitter/src/splitter.rs @@ -8,6 +8,8 @@ pub use common::source; use pgt_lexer::{Lexed, SyntaxKind}; use pgt_text_size::TextRange; +use crate::splitter::common::{ReachedEOFException, SplitterResult}; + pub struct SplitResult { pub ranges: Vec, pub errors: Vec, @@ -29,6 +31,7 @@ pub struct SplitError { pub token: usize, } +#[derive(Debug)] pub struct Splitter<'a> { lexed: &'a Lexed<'a>, current_pos: usize, @@ -102,12 +105,12 @@ impl<'a> Splitter<'a> { self.lexed.kind(self.current_pos) } - fn eat(&mut self, kind: SyntaxKind) -> bool { + fn eat(&mut self, kind: SyntaxKind) -> Result { if self.current() == kind { - self.advance(); - true + self.advance()?; + Ok(true) } else { - false + Ok(false) } } @@ -118,13 +121,17 @@ impl<'a> Splitter<'a> { /// Advances the parser to the next relevant token and returns it. /// /// NOTE: This will skip trivia tokens. - fn advance(&mut self) -> SyntaxKind { + fn advance(&mut self) -> Result { + if self.current() == SyntaxKind::EOF { + return Err(ReachedEOFException); + } + let pos = (self.current_pos + 1..self.lexed.len()) .find(|&idx| !self.is_trivia(idx)) - .expect("lexed should have non-trivia eof token"); + .unwrap(); self.current_pos = pos; - self.lexed.kind(pos) + Ok(self.lexed.kind(pos)) } fn look_ahead(&self, ignore_trivia: bool) -> SyntaxKind { @@ -164,9 +171,9 @@ impl<'a> Splitter<'a> { /// Will advance if the `kind` matches the current token. /// Otherwise, will add a diagnostic to the internal `errors`. - fn expect(&mut self, kind: SyntaxKind) { + fn expect(&mut self, kind: SyntaxKind) -> SplitterResult { if self.current() == kind { - self.advance(); + self.advance()?; } else { let token = if self.current() == SyntaxKind::EOF { self.current_pos - 1 @@ -178,6 +185,8 @@ impl<'a> Splitter<'a> { msg: format!("Expected {:#?}", kind), token, }); - } + }; + + Ok(()) } } diff --git a/crates/pgt_statement_splitter/src/splitter/common.rs b/crates/pgt_statement_splitter/src/splitter/common.rs index 9c6064b78..87b26ec29 100644 --- a/crates/pgt_statement_splitter/src/splitter/common.rs +++ b/crates/pgt_statement_splitter/src/splitter/common.rs @@ -1,3 +1,5 @@ +use std::error::Error; + use super::TRIVIA_TOKENS; use pgt_lexer::SyntaxKind; @@ -8,70 +10,76 @@ use super::{ dml::{cte, delete, insert, select, update}, }; -pub fn source(p: &mut Splitter) { +#[derive(Debug)] +pub struct ReachedEOFException; + +impl std::fmt::Display for ReachedEOFException { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ReachedEOFException") + } +} + +impl Error for ReachedEOFException {} + +pub(crate) type SplitterResult = std::result::Result<(), ReachedEOFException>; + +pub fn source(p: &mut Splitter) -> SplitterResult { loop { match p.current() { SyntaxKind::EOF => { break; } kind if TRIVIA_TOKENS.contains(&kind) || kind == SyntaxKind::LINE_ENDING => { - p.advance(); + p.advance()?; } SyntaxKind::BACKSLASH => { - plpgsql_command(p); + plpgsql_command(p)?; } _ => { - statement(p); + statement(p)?; } } } + + Ok(()) } -pub(crate) fn statement(p: &mut Splitter) { +pub(crate) fn statement(p: &mut Splitter) -> SplitterResult { p.start_stmt(); - match p.current() { - SyntaxKind::WITH_KW => { - cte(p); - } - SyntaxKind::SELECT_KW => { - select(p); - } - SyntaxKind::INSERT_KW => { - insert(p); - } - SyntaxKind::UPDATE_KW => { - update(p); - } - SyntaxKind::DELETE_KW => { - delete(p); - } - SyntaxKind::CREATE_KW => { - create(p); - } - SyntaxKind::ALTER_KW => { - alter(p); - } - _ => { - unknown(p, &[]); - } - } + + // Currently, Err means that we reached EOF. + // Regardless of whether we reach EOF or we complete the statement, we want to close it. + // We might want to handle other kinds of errors differently in the future. + let _ = match p.current() { + SyntaxKind::WITH_KW => cte(p), + SyntaxKind::SELECT_KW => select(p), + SyntaxKind::INSERT_KW => insert(p), + SyntaxKind::UPDATE_KW => update(p), + SyntaxKind::DELETE_KW => delete(p), + SyntaxKind::CREATE_KW => create(p), + SyntaxKind::ALTER_KW => alter(p), + _ => unknown(p, &[]), + }; + p.close_stmt(); + + Ok(()) } -pub(crate) fn begin_end(p: &mut Splitter) { - p.expect(SyntaxKind::BEGIN_KW); +pub(crate) fn begin_end(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::BEGIN_KW)?; let mut depth = 1; loop { match p.current() { SyntaxKind::BEGIN_KW => { - p.advance(); + p.advance()?; depth += 1; } - SyntaxKind::END_KW | SyntaxKind::EOF => { + SyntaxKind::END_KW => { if p.current() == SyntaxKind::END_KW { - p.advance(); + p.advance()?; } depth -= 1; if depth == 0 { @@ -79,26 +87,28 @@ pub(crate) fn begin_end(p: &mut Splitter) { } } _ => { - p.advance(); + p.advance()?; } } } + + Ok(()) } -pub(crate) fn parenthesis(p: &mut Splitter) { - p.expect(SyntaxKind::L_PAREN); +pub(crate) fn parenthesis(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::L_PAREN)?; let mut depth = 1; loop { match p.current() { SyntaxKind::L_PAREN => { - p.advance(); + p.advance()?; depth += 1; } - SyntaxKind::R_PAREN | SyntaxKind::EOF => { + SyntaxKind::R_PAREN => { if p.current() == SyntaxKind::R_PAREN { - p.advance(); + p.advance()?; } depth -= 1; if depth == 0 { @@ -106,19 +116,21 @@ pub(crate) fn parenthesis(p: &mut Splitter) { } } _ => { - p.advance(); + p.advance()?; } } } + + Ok(()) } -pub(crate) fn plpgsql_command(p: &mut Splitter) { - p.expect(SyntaxKind::BACKSLASH); +pub(crate) fn plpgsql_command(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::BACKSLASH)?; loop { match p.current() { SyntaxKind::LINE_ENDING => { - p.advance(); + p.advance()?; break; } _ => { @@ -128,43 +140,44 @@ pub(crate) fn plpgsql_command(p: &mut Splitter) { } } } + + Ok(()) } -pub(crate) fn case(p: &mut Splitter) { - p.expect(SyntaxKind::CASE_KW); +pub(crate) fn case(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::CASE_KW)?; loop { match p.current() { SyntaxKind::END_KW => { - p.advance(); + p.advance()?; break; } _ => { - p.advance(); + p.advance()?; } } } + + Ok(()) } -pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { +pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) -> SplitterResult { loop { match p.current() { SyntaxKind::SEMICOLON => { - p.advance(); - break; - } - SyntaxKind::EOF => { + p.advance()?; break; } SyntaxKind::LINE_ENDING => { if p.look_back(true).is_some_and(|t| t == SyntaxKind::COMMA) { - p.advance(); + p.advance()?; } else { break; } } SyntaxKind::CASE_KW => { - case(p); + case(p)?; } SyntaxKind::BACKSLASH => { // pgsql commands @@ -185,17 +198,17 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { { break; } - p.advance(); + p.advance()?; } SyntaxKind::L_PAREN => { - parenthesis(p); + parenthesis(p)?; } SyntaxKind::BEGIN_KW => { if p.look_ahead(true) != SyntaxKind::SEMICOLON { // BEGIN; should be treated as a statement terminator - begin_end(p); + begin_end(p)?; } else { - p.advance(); + p.advance()?; } } t => match at_statement_start(t, exclude) { @@ -232,7 +245,7 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { break; } - p.advance(); + p.advance()?; } Some(SyntaxKind::INSERT_KW) | Some(SyntaxKind::UPDATE_KW) @@ -271,7 +284,7 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { { break; } - p.advance(); + p.advance()?; } Some(SyntaxKind::WITH_KW) => { let next = p.look_ahead(true); @@ -292,7 +305,7 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { { break; } - p.advance(); + p.advance()?; } Some(SyntaxKind::CREATE_KW) => { let prev = p.look_back(true); @@ -309,15 +322,16 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { break; } - p.advance(); + p.advance()?; } Some(_) => { break; } None => { - p.advance(); + p.advance()?; } }, } } + Ok(()) } diff --git a/crates/pgt_statement_splitter/src/splitter/ddl.rs b/crates/pgt_statement_splitter/src/splitter/ddl.rs index 449288aab..c301d6a0b 100644 --- a/crates/pgt_statement_splitter/src/splitter/ddl.rs +++ b/crates/pgt_statement_splitter/src/splitter/ddl.rs @@ -1,15 +1,17 @@ use pgt_lexer::SyntaxKind; +use crate::splitter::common::SplitterResult; + use super::{Splitter, common::unknown}; -pub(crate) fn create(p: &mut Splitter) { - p.expect(SyntaxKind::CREATE_KW); +pub(crate) fn create(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::CREATE_KW)?; - unknown(p, &[SyntaxKind::WITH_KW]); + unknown(p, &[SyntaxKind::WITH_KW]) } -pub(crate) fn alter(p: &mut Splitter) { - p.expect(SyntaxKind::ALTER_KW); +pub(crate) fn alter(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::ALTER_KW)?; - unknown(p, &[SyntaxKind::ALTER_KW]); + unknown(p, &[SyntaxKind::ALTER_KW]) } diff --git a/crates/pgt_statement_splitter/src/splitter/dml.rs b/crates/pgt_statement_splitter/src/splitter/dml.rs index acfbebfc9..ea80f0bec 100644 --- a/crates/pgt_statement_splitter/src/splitter/dml.rs +++ b/crates/pgt_statement_splitter/src/splitter/dml.rs @@ -1,21 +1,23 @@ use pgt_lexer::SyntaxKind; +use crate::splitter::common::SplitterResult; + use super::{ Splitter, common::{parenthesis, unknown}, }; -pub(crate) fn cte(p: &mut Splitter) { - p.expect(SyntaxKind::WITH_KW); - p.eat(SyntaxKind::RECURSIVE_KW); +pub(crate) fn cte(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::WITH_KW)?; + p.eat(SyntaxKind::RECURSIVE_KW)?; loop { - p.expect(SyntaxKind::IDENT); - p.expect(SyntaxKind::AS_KW); - parenthesis(p); + p.expect(SyntaxKind::IDENT)?; + p.expect(SyntaxKind::AS_KW)?; + parenthesis(p)?; if p.current() == SyntaxKind::COMMA { - p.advance(); + p.advance()?; } else { break; } @@ -30,31 +32,32 @@ pub(crate) fn cte(p: &mut Splitter) { SyntaxKind::DELETE_KW, SyntaxKind::MERGE_KW, ], - ); + )?; + Ok(()) } -pub(crate) fn select(p: &mut Splitter) { - p.expect(SyntaxKind::SELECT_KW); +pub(crate) fn select(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::SELECT_KW)?; - unknown(p, &[]); + unknown(p, &[]) } -pub(crate) fn insert(p: &mut Splitter) { - p.expect(SyntaxKind::INSERT_KW); - p.expect(SyntaxKind::INTO_KW); +pub(crate) fn insert(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::INSERT_KW)?; + p.expect(SyntaxKind::INTO_KW)?; - unknown(p, &[SyntaxKind::SELECT_KW]); + unknown(p, &[SyntaxKind::SELECT_KW]) } -pub(crate) fn update(p: &mut Splitter) { - p.expect(SyntaxKind::UPDATE_KW); +pub(crate) fn update(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::UPDATE_KW)?; - unknown(p, &[]); + unknown(p, &[]) } -pub(crate) fn delete(p: &mut Splitter) { - p.expect(SyntaxKind::DELETE_KW); - p.expect(SyntaxKind::FROM_KW); +pub(crate) fn delete(p: &mut Splitter) -> SplitterResult { + p.expect(SyntaxKind::DELETE_KW)?; + p.expect(SyntaxKind::FROM_KW)?; - unknown(p, &[]); + unknown(p, &[]) }