Skip to content

Commit 3d43763

Browse files
committed
Support 2way
1 parent e1490cd commit 3d43763

File tree

13 files changed

+618
-52
lines changed

13 files changed

+618
-52
lines changed

crates/postgresql-cst-parser/src/cst.rs

+150-48
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
pub(crate) mod extra;
2+
pub(crate) mod lr_parse_state;
3+
4+
pub(crate) use extra::*;
5+
pub(crate) use lr_parse_state::*;
6+
7+
use std::collections::HashSet;
8+
19
use cstree::{
210
build::GreenNodeBuilder, green::GreenNode, interning::Resolver, RawSyntaxKind, Syntax,
311
};
@@ -10,18 +18,25 @@ use crate::{
1018
rule_name_to_component_id, token_kind_to_component_id, Action, ACTION_TABLE, GOTO_TABLE,
1119
RULES,
1220
},
21+
transform::{ParseTransform, ParseTransformer},
1322
};
1423

1524
use super::{lexer::Token, syntax_kind::SyntaxKind};
1625

17-
struct Node {
26+
pub(crate) struct Node {
1827
token: Option<Token>,
19-
component_id: u32,
28+
pub component_id: u32,
2029
children: Vec<Node>,
2130
start_byte_pos: usize,
2231
end_byte_pos: usize,
2332
}
2433

34+
impl From<&Node> for SyntaxKind {
35+
fn from(value: &Node) -> Self {
36+
SyntaxKind::from_raw(RawSyntaxKind(value.component_id))
37+
}
38+
}
39+
2540
pub type PostgreSQLSyntax = SyntaxKind;
2641

2742
impl From<SyntaxKind> for cstree::RawSyntaxKind {
@@ -55,21 +70,30 @@ impl Parser {
5570
fn parse_rec(
5671
&mut self,
5772
node: &Node,
58-
peekable: &mut std::iter::Peekable<std::vec::IntoIter<(SyntaxKind, usize, usize, &str)>>,
73+
peekable: &mut std::iter::Peekable<std::vec::IntoIter<Extra>>,
74+
complement_token: &HashSet<usize>,
5975
) {
6076
if cfg!(feature = "remove-empty-node") {
61-
if node.start_byte_pos == node.end_byte_pos {
77+
if node.start_byte_pos == node.end_byte_pos
78+
&& !complement_token.contains(&node.start_byte_pos)
79+
{
6280
return;
6381
}
6482
}
6583

66-
while let Some((kind, start, _, text)) = peekable.peek() {
84+
while let Some(Extra {
85+
kind,
86+
start_byte_pos,
87+
comment,
88+
..
89+
}) = peekable.peek()
90+
{
6791
// TODO: Consider whether the presence or absence of an equals sign changes the position of comments. Determine which option is preferable
68-
if *start >= node.start_byte_pos {
92+
if *start_byte_pos >= node.start_byte_pos {
6993
// if *start > node.start_byte_pos {
7094
break;
7195
}
72-
self.builder.token(*kind, text);
96+
self.builder.token(*kind, &comment);
7397
peekable.next();
7498
}
7599

@@ -80,26 +104,27 @@ impl Parser {
80104
self.builder.start_node(kind);
81105
node.children
82106
.iter()
83-
.for_each(|c| self.parse_rec(c, peekable));
107+
.for_each(|c| self.parse_rec(c, peekable, complement_token));
84108
self.builder.finish_node();
85109
}
86110
}
87111

88112
fn parse(
89113
mut self,
90114
nodes: &Vec<&Node>,
91-
extras: Vec<(SyntaxKind, usize, usize, &str)>,
115+
extras: Vec<Extra>,
116+
complement_token: &HashSet<usize>,
92117
) -> (GreenNode, impl Resolver) {
93118
let mut peekable = extras.into_iter().peekable();
94119

95120
self.builder.start_node(SyntaxKind::Root);
96121

97122
for node in nodes {
98-
self.parse_rec(node, &mut peekable);
123+
self.parse_rec(node, &mut peekable, complement_token);
99124
}
100125

101-
while let Some((kind, _, _, text)) = peekable.peek() {
102-
self.builder.token(*kind, text);
126+
while let Some(Extra { kind, comment, .. }) = peekable.peek() {
127+
self.builder.token(*kind, comment);
103128
peekable.next();
104129
}
105130

@@ -184,6 +209,14 @@ fn init_tokens(tokens: &mut [Token]) {
184209

185210
/// Parsing a string as PostgreSQL syntax and converting it into a ResolvedNode
186211
pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
212+
parse_with_transformer(input, &[])
213+
}
214+
215+
/// Parsing a string as PostgreSQL syntax and converting it into a ResolvedNode
216+
pub fn parse_with_transformer(
217+
input: &str,
218+
transformers: &[&dyn ParseTransformer],
219+
) -> Result<ResolvedNode, ParseError> {
187220
let mut tokens = lex(input);
188221

189222
if !tokens.is_empty() {
@@ -217,12 +250,13 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
217250
));
218251

219252
let mut last_pos = 0;
220-
let mut extras: Vec<(SyntaxKind, usize, usize, &str)> = Vec::new();
253+
let mut extras: Vec<Extra> = Vec::new();
254+
let mut complement_token = HashSet::new();
221255

222256
loop {
223257
let state = stack.last().unwrap().0;
224-
let token = match tokens.peek() {
225-
Some(token) => token,
258+
let mut token = match tokens.peek() {
259+
Some(token) => token.clone(),
226260
None => {
227261
return Err(ParseError {
228262
message: "unexpected end of input".to_string(),
@@ -232,39 +266,105 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
232266
}
233267
};
234268

235-
let cid = token_kind_to_component_id(&token.kind);
269+
let mut cid = token_kind_to_component_id(&token.kind);
236270

237271
if matches!(token.kind, TokenKind::C_COMMENT | TokenKind::SQL_COMMENT) {
238272
if last_pos < token.start_byte_pos {
239-
extras.push((
240-
SyntaxKind::Whitespace,
241-
last_pos,
242-
token.start_byte_pos,
243-
&input[last_pos..token.start_byte_pos],
244-
));
273+
extras.push(Extra {
274+
kind: SyntaxKind::Whitespace,
275+
start_byte_pos: last_pos,
276+
end_byte_pos: token.start_byte_pos,
277+
comment: &input[last_pos..token.start_byte_pos],
278+
});
245279
}
246280

247281
last_pos = token.end_byte_pos;
248282

249283
let kind = SyntaxKind::from_raw(RawSyntaxKind(cid));
250-
extras.push((
284+
extras.push(Extra {
251285
kind,
252-
token.start_byte_pos,
253-
token.end_byte_pos,
254-
&input[token.start_byte_pos..token.end_byte_pos],
255-
));
286+
start_byte_pos: token.start_byte_pos,
287+
end_byte_pos: token.end_byte_pos,
288+
comment: &input[token.start_byte_pos..token.end_byte_pos],
289+
});
256290
tokens.next();
257291

258292
continue;
259293
}
260294

261-
let action = match action_table[(state * num_terminal_symbol() + cid) as usize] {
295+
let mut insert_dummy_token = false;
296+
let mut action = match action_table[(state * num_terminal_symbol() + cid) as usize] {
262297
0x7FFF => Action::Error,
263298
v if v > 0 => Action::Shift((v - 1) as usize),
264299
v if v < 0 => Action::Reduce((-v - 1) as usize),
265300
_ => Action::Accept,
266301
};
267302

303+
// transform
304+
{
305+
let lr_parse_state = LRParseState {
306+
state,
307+
stack: &stack,
308+
action_table,
309+
goto_table,
310+
extras: &extras,
311+
token: &token,
312+
};
313+
314+
if let Some(parse_transform) = transformers
315+
.iter()
316+
.find_map(|t| t.transform(&lr_parse_state))
317+
{
318+
match parse_transform {
319+
ParseTransform::InsertToken(token_kind) => {
320+
let last_extra = extras.last().unwrap();
321+
322+
cid = token_kind_to_component_id(&token_kind);
323+
token = Token {
324+
start_byte_pos: last_extra.end_byte_pos,
325+
end_byte_pos: last_extra.end_byte_pos,
326+
kind: token_kind,
327+
value: String::new(),
328+
};
329+
complement_token.insert(token.start_byte_pos);
330+
331+
action = match action_table[(state * num_terminal_symbol() + cid) as usize]
332+
{
333+
0x7FFF => Action::Error,
334+
v if v > 0 => Action::Shift((v - 1) as usize),
335+
v if v < 0 => Action::Reduce((-v - 1) as usize),
336+
_ => Action::Accept,
337+
};
338+
insert_dummy_token = true;
339+
}
340+
341+
ParseTransform::SkipToken => {
342+
// Skip tokens are treated as extras
343+
if last_pos < token.start_byte_pos {
344+
extras.push(Extra {
345+
kind: SyntaxKind::Whitespace,
346+
start_byte_pos: last_pos,
347+
end_byte_pos: token.start_byte_pos,
348+
comment: &input[last_pos..token.start_byte_pos],
349+
});
350+
}
351+
352+
last_pos = token.end_byte_pos;
353+
354+
let kind = SyntaxKind::from_raw(RawSyntaxKind(cid));
355+
extras.push(Extra {
356+
kind,
357+
start_byte_pos: token.start_byte_pos,
358+
end_byte_pos: token.end_byte_pos,
359+
comment: &input[token.start_byte_pos..token.end_byte_pos],
360+
});
361+
tokens.next();
362+
continue;
363+
}
364+
}
365+
}
366+
}
367+
268368
match action {
269369
Action::Shift(next_state) => {
270370
let node = Node {
@@ -276,18 +376,20 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
276376
};
277377

278378
if last_pos < token.start_byte_pos {
279-
extras.push((
280-
SyntaxKind::Whitespace,
281-
last_pos,
282-
token.start_byte_pos,
283-
&input[last_pos..token.start_byte_pos],
284-
));
379+
extras.push(Extra {
380+
kind: SyntaxKind::Whitespace,
381+
start_byte_pos: last_pos,
382+
end_byte_pos: token.start_byte_pos,
383+
comment: &input[last_pos..token.start_byte_pos],
384+
});
285385
}
286386

287387
last_pos = token.end_byte_pos;
288388

289389
stack.push((next_state as u32, node));
290-
tokens.next();
390+
if !insert_dummy_token {
391+
tokens.next();
392+
}
291393
}
292394
Action::Reduce(rule_index) => {
293395
let rule = &RULES[rule_index];
@@ -308,7 +410,7 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
308410
// Adopt the larger of the end position of the previous token or the end of the space.
309411
extras
310412
.last()
311-
.map(|e| e.2)
413+
.map(|e| e.end_byte_pos)
312414
.unwrap_or_default()
313415
.max(stack.last().unwrap().1.end_byte_pos)
314416
});
@@ -364,12 +466,12 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
364466

365467
while let Some(token) = tokens.next() {
366468
if last_pos < token.start_byte_pos {
367-
extras.push((
368-
SyntaxKind::Whitespace,
369-
last_pos,
370-
token.start_byte_pos,
371-
&input[last_pos..token.start_byte_pos],
372-
));
469+
extras.push(Extra {
470+
kind: SyntaxKind::Whitespace,
471+
start_byte_pos: last_pos,
472+
end_byte_pos: token.start_byte_pos,
473+
comment: &input[last_pos..token.start_byte_pos],
474+
});
373475
}
374476

375477
last_pos = token.end_byte_pos;
@@ -381,19 +483,19 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
381483

382484
let cid = token_kind_to_component_id(&token.kind);
383485
let kind = SyntaxKind::from_raw(RawSyntaxKind(cid));
384-
extras.push((
486+
extras.push(Extra {
385487
kind,
386-
token.start_byte_pos,
387-
token.end_byte_pos,
388-
&input[token.start_byte_pos..token.end_byte_pos],
389-
));
488+
start_byte_pos: token.start_byte_pos,
489+
end_byte_pos: token.end_byte_pos,
490+
comment: &input[token.start_byte_pos..token.end_byte_pos],
491+
});
390492
}
391493

392494
let parser = Parser {
393495
builder: GreenNodeBuilder::new(),
394496
};
395497
let root: Vec<&Node> = stack[1..].iter().map(|s| &s.1).collect();
396-
let (ast, resolver) = parser.parse(&root, extras);
498+
let (ast, resolver) = parser.parse(&root, extras, &complement_token);
397499

398500
Ok(SyntaxNode::new_root_with_resolver(ast, resolver))
399501
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
use crate::syntax_kind::SyntaxKind;
2+
3+
#[derive(Debug)]
4+
pub(crate) struct Extra<'a> {
5+
pub(crate) kind: SyntaxKind,
6+
pub(crate) start_byte_pos: usize,
7+
pub(crate) end_byte_pos: usize,
8+
pub(crate) comment: &'a str,
9+
}

0 commit comments

Comments
 (0)