1
+ pub ( crate ) mod extra;
2
+ pub ( crate ) mod lr_parse_state;
3
+
4
+ pub ( crate ) use extra:: * ;
5
+ pub ( crate ) use lr_parse_state:: * ;
6
+
7
+ use std:: collections:: HashSet ;
8
+
1
9
use cstree:: {
2
10
build:: GreenNodeBuilder , green:: GreenNode , interning:: Resolver , RawSyntaxKind , Syntax ,
3
11
} ;
@@ -10,18 +18,25 @@ use crate::{
10
18
rule_name_to_component_id, token_kind_to_component_id, Action , ACTION_TABLE , GOTO_TABLE ,
11
19
RULES ,
12
20
} ,
21
+ transform:: { ParseTransform , ParseTransformer } ,
13
22
} ;
14
23
15
24
use super :: { lexer:: Token , syntax_kind:: SyntaxKind } ;
16
25
17
- struct Node {
26
+ pub ( crate ) struct Node {
18
27
token : Option < Token > ,
19
- component_id : u32 ,
28
+ pub component_id : u32 ,
20
29
children : Vec < Node > ,
21
30
start_byte_pos : usize ,
22
31
end_byte_pos : usize ,
23
32
}
24
33
34
+ impl From < & Node > for SyntaxKind {
35
+ fn from ( value : & Node ) -> Self {
36
+ SyntaxKind :: from_raw ( RawSyntaxKind ( value. component_id ) )
37
+ }
38
+ }
39
+
25
40
pub type PostgreSQLSyntax = SyntaxKind ;
26
41
27
42
impl From < SyntaxKind > for cstree:: RawSyntaxKind {
@@ -55,21 +70,30 @@ impl Parser {
55
70
fn parse_rec (
56
71
& mut self ,
57
72
node : & Node ,
58
- peekable : & mut std:: iter:: Peekable < std:: vec:: IntoIter < ( SyntaxKind , usize , usize , & str ) > > ,
73
+ peekable : & mut std:: iter:: Peekable < std:: vec:: IntoIter < Extra > > ,
74
+ complement_token : & HashSet < usize > ,
59
75
) {
60
76
if cfg ! ( feature = "remove-empty-node" ) {
61
- if node. start_byte_pos == node. end_byte_pos {
77
+ if node. start_byte_pos == node. end_byte_pos
78
+ && !complement_token. contains ( & node. start_byte_pos )
79
+ {
62
80
return ;
63
81
}
64
82
}
65
83
66
- while let Some ( ( kind, start, _, text) ) = peekable. peek ( ) {
84
+ while let Some ( Extra {
85
+ kind,
86
+ start_byte_pos,
87
+ comment,
88
+ ..
89
+ } ) = peekable. peek ( )
90
+ {
67
91
// TODO: Consider whether the presence or absence of an equals sign changes the position of comments. Determine which option is preferable
68
- if * start >= node. start_byte_pos {
92
+ if * start_byte_pos >= node. start_byte_pos {
69
93
// if *start > node.start_byte_pos {
70
94
break ;
71
95
}
72
- self . builder . token ( * kind, text ) ;
96
+ self . builder . token ( * kind, & comment ) ;
73
97
peekable. next ( ) ;
74
98
}
75
99
@@ -80,26 +104,27 @@ impl Parser {
80
104
self . builder . start_node ( kind) ;
81
105
node. children
82
106
. iter ( )
83
- . for_each ( |c| self . parse_rec ( c, peekable) ) ;
107
+ . for_each ( |c| self . parse_rec ( c, peekable, complement_token ) ) ;
84
108
self . builder . finish_node ( ) ;
85
109
}
86
110
}
87
111
88
112
fn parse (
89
113
mut self ,
90
114
nodes : & Vec < & Node > ,
91
- extras : Vec < ( SyntaxKind , usize , usize , & str ) > ,
115
+ extras : Vec < Extra > ,
116
+ complement_token : & HashSet < usize > ,
92
117
) -> ( GreenNode , impl Resolver ) {
93
118
let mut peekable = extras. into_iter ( ) . peekable ( ) ;
94
119
95
120
self . builder . start_node ( SyntaxKind :: Root ) ;
96
121
97
122
for node in nodes {
98
- self . parse_rec ( node, & mut peekable) ;
123
+ self . parse_rec ( node, & mut peekable, complement_token ) ;
99
124
}
100
125
101
- while let Some ( ( kind, _ , _ , text ) ) = peekable. peek ( ) {
102
- self . builder . token ( * kind, text ) ;
126
+ while let Some ( Extra { kind, comment , .. } ) = peekable. peek ( ) {
127
+ self . builder . token ( * kind, comment ) ;
103
128
peekable. next ( ) ;
104
129
}
105
130
@@ -184,6 +209,14 @@ fn init_tokens(tokens: &mut [Token]) {
184
209
185
210
/// Parsing a string as PostgreSQL syntax and converting it into a ResolvedNode
186
211
pub fn parse ( input : & str ) -> Result < ResolvedNode , ParseError > {
212
+ parse_with_transformer ( input, & [ ] )
213
+ }
214
+
215
+ /// Parsing a string as PostgreSQL syntax and converting it into a ResolvedNode
216
+ pub fn parse_with_transformer (
217
+ input : & str ,
218
+ transformers : & [ & dyn ParseTransformer ] ,
219
+ ) -> Result < ResolvedNode , ParseError > {
187
220
let mut tokens = lex ( input) ;
188
221
189
222
if !tokens. is_empty ( ) {
@@ -217,12 +250,13 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
217
250
) ) ;
218
251
219
252
let mut last_pos = 0 ;
220
- let mut extras: Vec < ( SyntaxKind , usize , usize , & str ) > = Vec :: new ( ) ;
253
+ let mut extras: Vec < Extra > = Vec :: new ( ) ;
254
+ let mut complement_token = HashSet :: new ( ) ;
221
255
222
256
loop {
223
257
let state = stack. last ( ) . unwrap ( ) . 0 ;
224
- let token = match tokens. peek ( ) {
225
- Some ( token) => token,
258
+ let mut token = match tokens. peek ( ) {
259
+ Some ( token) => token. clone ( ) ,
226
260
None => {
227
261
return Err ( ParseError {
228
262
message : "unexpected end of input" . to_string ( ) ,
@@ -232,39 +266,105 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
232
266
}
233
267
} ;
234
268
235
- let cid = token_kind_to_component_id ( & token. kind ) ;
269
+ let mut cid = token_kind_to_component_id ( & token. kind ) ;
236
270
237
271
if matches ! ( token. kind, TokenKind :: C_COMMENT | TokenKind :: SQL_COMMENT ) {
238
272
if last_pos < token. start_byte_pos {
239
- extras. push ( (
240
- SyntaxKind :: Whitespace ,
241
- last_pos,
242
- token. start_byte_pos ,
243
- & input[ last_pos..token. start_byte_pos ] ,
244
- ) ) ;
273
+ extras. push ( Extra {
274
+ kind : SyntaxKind :: Whitespace ,
275
+ start_byte_pos : last_pos,
276
+ end_byte_pos : token. start_byte_pos ,
277
+ comment : & input[ last_pos..token. start_byte_pos ] ,
278
+ } ) ;
245
279
}
246
280
247
281
last_pos = token. end_byte_pos ;
248
282
249
283
let kind = SyntaxKind :: from_raw ( RawSyntaxKind ( cid) ) ;
250
- extras. push ( (
284
+ extras. push ( Extra {
251
285
kind,
252
- token. start_byte_pos ,
253
- token. end_byte_pos ,
254
- & input[ token. start_byte_pos ..token. end_byte_pos ] ,
255
- ) ) ;
286
+ start_byte_pos : token. start_byte_pos ,
287
+ end_byte_pos : token. end_byte_pos ,
288
+ comment : & input[ token. start_byte_pos ..token. end_byte_pos ] ,
289
+ } ) ;
256
290
tokens. next ( ) ;
257
291
258
292
continue ;
259
293
}
260
294
261
- let action = match action_table[ ( state * num_terminal_symbol ( ) + cid) as usize ] {
295
+ let mut insert_dummy_token = false ;
296
+ let mut action = match action_table[ ( state * num_terminal_symbol ( ) + cid) as usize ] {
262
297
0x7FFF => Action :: Error ,
263
298
v if v > 0 => Action :: Shift ( ( v - 1 ) as usize ) ,
264
299
v if v < 0 => Action :: Reduce ( ( -v - 1 ) as usize ) ,
265
300
_ => Action :: Accept ,
266
301
} ;
267
302
303
+ // transform
304
+ {
305
+ let lr_parse_state = LRParseState {
306
+ state,
307
+ stack : & stack,
308
+ action_table,
309
+ goto_table,
310
+ extras : & extras,
311
+ token : & token,
312
+ } ;
313
+
314
+ if let Some ( parse_transform) = transformers
315
+ . iter ( )
316
+ . find_map ( |t| t. transform ( & lr_parse_state) )
317
+ {
318
+ match parse_transform {
319
+ ParseTransform :: InsertToken ( token_kind) => {
320
+ let last_extra = extras. last ( ) . unwrap ( ) ;
321
+
322
+ cid = token_kind_to_component_id ( & token_kind) ;
323
+ token = Token {
324
+ start_byte_pos : last_extra. end_byte_pos ,
325
+ end_byte_pos : last_extra. end_byte_pos ,
326
+ kind : token_kind,
327
+ value : String :: new ( ) ,
328
+ } ;
329
+ complement_token. insert ( token. start_byte_pos ) ;
330
+
331
+ action = match action_table[ ( state * num_terminal_symbol ( ) + cid) as usize ]
332
+ {
333
+ 0x7FFF => Action :: Error ,
334
+ v if v > 0 => Action :: Shift ( ( v - 1 ) as usize ) ,
335
+ v if v < 0 => Action :: Reduce ( ( -v - 1 ) as usize ) ,
336
+ _ => Action :: Accept ,
337
+ } ;
338
+ insert_dummy_token = true ;
339
+ }
340
+
341
+ ParseTransform :: SkipToken => {
342
+ // Skip tokens are treated as extras
343
+ if last_pos < token. start_byte_pos {
344
+ extras. push ( Extra {
345
+ kind : SyntaxKind :: Whitespace ,
346
+ start_byte_pos : last_pos,
347
+ end_byte_pos : token. start_byte_pos ,
348
+ comment : & input[ last_pos..token. start_byte_pos ] ,
349
+ } ) ;
350
+ }
351
+
352
+ last_pos = token. end_byte_pos ;
353
+
354
+ let kind = SyntaxKind :: from_raw ( RawSyntaxKind ( cid) ) ;
355
+ extras. push ( Extra {
356
+ kind,
357
+ start_byte_pos : token. start_byte_pos ,
358
+ end_byte_pos : token. end_byte_pos ,
359
+ comment : & input[ token. start_byte_pos ..token. end_byte_pos ] ,
360
+ } ) ;
361
+ tokens. next ( ) ;
362
+ continue ;
363
+ }
364
+ }
365
+ }
366
+ }
367
+
268
368
match action {
269
369
Action :: Shift ( next_state) => {
270
370
let node = Node {
@@ -276,18 +376,20 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
276
376
} ;
277
377
278
378
if last_pos < token. start_byte_pos {
279
- extras. push ( (
280
- SyntaxKind :: Whitespace ,
281
- last_pos,
282
- token. start_byte_pos ,
283
- & input[ last_pos..token. start_byte_pos ] ,
284
- ) ) ;
379
+ extras. push ( Extra {
380
+ kind : SyntaxKind :: Whitespace ,
381
+ start_byte_pos : last_pos,
382
+ end_byte_pos : token. start_byte_pos ,
383
+ comment : & input[ last_pos..token. start_byte_pos ] ,
384
+ } ) ;
285
385
}
286
386
287
387
last_pos = token. end_byte_pos ;
288
388
289
389
stack. push ( ( next_state as u32 , node) ) ;
290
- tokens. next ( ) ;
390
+ if !insert_dummy_token {
391
+ tokens. next ( ) ;
392
+ }
291
393
}
292
394
Action :: Reduce ( rule_index) => {
293
395
let rule = & RULES [ rule_index] ;
@@ -308,7 +410,7 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
308
410
// Adopt the larger of the end position of the previous token or the end of the space.
309
411
extras
310
412
. last ( )
311
- . map ( |e| e. 2 )
413
+ . map ( |e| e. end_byte_pos )
312
414
. unwrap_or_default ( )
313
415
. max ( stack. last ( ) . unwrap ( ) . 1 . end_byte_pos )
314
416
} ) ;
@@ -364,12 +466,12 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
364
466
365
467
while let Some ( token) = tokens. next ( ) {
366
468
if last_pos < token. start_byte_pos {
367
- extras. push ( (
368
- SyntaxKind :: Whitespace ,
369
- last_pos,
370
- token. start_byte_pos ,
371
- & input[ last_pos..token. start_byte_pos ] ,
372
- ) ) ;
469
+ extras. push ( Extra {
470
+ kind : SyntaxKind :: Whitespace ,
471
+ start_byte_pos : last_pos,
472
+ end_byte_pos : token. start_byte_pos ,
473
+ comment : & input[ last_pos..token. start_byte_pos ] ,
474
+ } ) ;
373
475
}
374
476
375
477
last_pos = token. end_byte_pos ;
@@ -381,19 +483,19 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
381
483
382
484
let cid = token_kind_to_component_id ( & token. kind ) ;
383
485
let kind = SyntaxKind :: from_raw ( RawSyntaxKind ( cid) ) ;
384
- extras. push ( (
486
+ extras. push ( Extra {
385
487
kind,
386
- token. start_byte_pos ,
387
- token. end_byte_pos ,
388
- & input[ token. start_byte_pos ..token. end_byte_pos ] ,
389
- ) ) ;
488
+ start_byte_pos : token. start_byte_pos ,
489
+ end_byte_pos : token. end_byte_pos ,
490
+ comment : & input[ token. start_byte_pos ..token. end_byte_pos ] ,
491
+ } ) ;
390
492
}
391
493
392
494
let parser = Parser {
393
495
builder : GreenNodeBuilder :: new ( ) ,
394
496
} ;
395
497
let root: Vec < & Node > = stack[ 1 ..] . iter ( ) . map ( |s| & s. 1 ) . collect ( ) ;
396
- let ( ast, resolver) = parser. parse ( & root, extras) ;
498
+ let ( ast, resolver) = parser. parse ( & root, extras, & complement_token ) ;
397
499
398
500
Ok ( SyntaxNode :: new_root_with_resolver ( ast, resolver) )
399
501
}
0 commit comments