1
- // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
1
+ // Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
2
2
// file at the top-level directory of this distribution and at
3
3
// http://rust-lang.org/COPYRIGHT.
4
4
//
8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
- //! This is an Earley-like parser, without support for in-grammar nonterminals,
12
- //! only by calling out to the main rust parser for named nonterminals (which it
13
- //! commits to fully when it hits one in a grammar). This means that there are no
14
- //! completer or predictor rules, and therefore no need to store one column per
15
- //! token: instead, there's a set of current Earley items and a set of next
16
- //! ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
17
- //! pathological cases, is worse than traditional Earley parsing, but it's an
18
- //! easier fit for Macro-by-Example-style rules, and I think the overhead is
19
- //! lower. (In order to prevent the pathological case, we'd need to lazily
20
- //! construct the resulting `NamedMatch`es at the very end. It'd be a pain,
21
- //! and require more memory to keep around old items, but it would also save
22
- //! overhead)
11
+ //! This is an NFA-based parser, which calls out to the main rust parser for named nonterminals
12
+ //! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
13
+ //! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
14
+ //! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
15
+ //! fit for Macro-by-Example-style rules.
16
+ //!
17
+ //! (In order to prevent the pathological case, we'd need to lazily construct the resulting
18
+ //! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
19
+ //! items, but it would also save overhead)
20
+ //!
21
+ //! We don't say this parser uses the Earley algorithm, because it's unnecessarily innacurate.
22
+ //! The macro parser restricts itself to the features of finite state automata. Earley parsers
23
+ //! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
23
24
//!
24
25
//! Quick intro to how the parser works:
25
26
//!
26
27
//! A 'position' is a dot in the middle of a matcher, usually represented as a
27
28
//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
28
29
//!
29
30
//! The parser walks through the input a character at a time, maintaining a list
30
- //! of items consistent with the current position in the input string: `cur_eis `.
31
+ //! of threads consistent with the current position in the input string: `cur_items `.
31
32
//!
32
- //! As it processes them, it fills up `eof_eis ` with items that would be valid if
33
- //! the macro invocation is now over, `bb_eis ` with items that are waiting on
34
- //! a Rust nonterminal like `$e:expr`, and `next_eis ` with items that are waiting
33
+ //! As it processes them, it fills up `eof_items ` with threads that would be valid if
34
+ //! the macro invocation is now over, `bb_items ` with threads that are waiting on
35
+ //! a Rust nonterminal like `$e:expr`, and `next_items ` with threads that are waiting
35
36
//! on a particular token. Most of the logic concerns moving the · through the
36
- //! repetitions indicated by Kleene stars. It only advances or calls out to the
37
- //! real Rust parser when no `cur_eis` items remain
37
+ //! repetitions indicated by Kleene stars. The rules for moving the · without
38
+ //! consuming any input are called epsilon transitions. It only advances or calls
39
+ //! out to the real Rust parser when no `cur_items` threads remain.
38
40
//!
39
41
//! Example:
40
42
//!
41
43
//! ```text, ignore
42
44
//! Start parsing a a a a b against [· a $( a )* a b].
43
45
//!
44
46
//! Remaining input: a a a a b
45
- //! next_eis : [· a $( a )* a b]
47
+ //! next : [· a $( a )* a b]
46
48
//!
47
49
//! - - - Advance over an a. - - -
48
50
//!
54
56
//! - - - Advance over an a. - - -
55
57
//!
56
58
//! Remaining input: a a b
57
- //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
58
- //! Finish/Repeat (first item)
59
+ //! cur: [a $( a · )* a b] [a $( a )* a · b]
60
+ //! Follow epsilon transition: Finish/Repeat (first item)
59
61
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
60
62
//!
61
63
//! - - - Advance over an a. - - - (this looks exactly like the last step)
62
64
//!
63
65
//! Remaining input: a b
64
- //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
65
- //! Finish/Repeat (first item)
66
+ //! cur: [a $( a · )* a b] [a $( a )* a · b]
67
+ //! Follow epsilon transition: Finish/Repeat (first item)
66
68
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
67
69
//!
68
70
//! - - - Advance over an a. - - - (this looks exactly like the last step)
69
71
//!
70
72
//! Remaining input: b
71
- //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
72
- //! Finish/Repeat (first item)
73
- //! next: [a $( a )* · a b] [a $( · a )* a b]
73
+ //! cur: [a $( a · )* a b] [a $( a )* a · b]
74
+ //! Follow epsilon transition: Finish/Repeat (first item)
75
+ //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
74
76
//!
75
77
//! - - - Advance over a b. - - -
76
78
//!
@@ -289,94 +291,94 @@ fn create_matches(len: usize) -> Vec<Rc<Vec<NamedMatch>>> {
289
291
}
290
292
291
293
fn inner_parse_loop ( sess : & ParseSess ,
292
- cur_eis : & mut SmallVector < Box < MatcherPos > > ,
293
- next_eis : & mut Vec < Box < MatcherPos > > ,
294
- eof_eis : & mut SmallVector < Box < MatcherPos > > ,
295
- bb_eis : & mut SmallVector < Box < MatcherPos > > ,
294
+ cur_items : & mut SmallVector < Box < MatcherPos > > ,
295
+ next_items : & mut Vec < Box < MatcherPos > > ,
296
+ eof_items : & mut SmallVector < Box < MatcherPos > > ,
297
+ bb_items : & mut SmallVector < Box < MatcherPos > > ,
296
298
token : & Token ,
297
299
span : syntax_pos:: Span )
298
300
-> ParseResult < ( ) > {
299
- while let Some ( mut ei ) = cur_eis . pop ( ) {
301
+ while let Some ( mut item ) = cur_items . pop ( ) {
300
302
// When unzipped trees end, remove them
301
- while ei . idx >= ei . top_elts . len ( ) {
302
- match ei . stack . pop ( ) {
303
+ while item . idx >= item . top_elts . len ( ) {
304
+ match item . stack . pop ( ) {
303
305
Some ( MatcherTtFrame { elts, idx } ) => {
304
- ei . top_elts = elts;
305
- ei . idx = idx + 1 ;
306
+ item . top_elts = elts;
307
+ item . idx = idx + 1 ;
306
308
}
307
309
None => break
308
310
}
309
311
}
310
312
311
- let idx = ei . idx ;
312
- let len = ei . top_elts . len ( ) ;
313
+ let idx = item . idx ;
314
+ let len = item . top_elts . len ( ) ;
313
315
314
316
// at end of sequence
315
317
if idx >= len {
316
318
// We are repeating iff there is a parent
317
- if ei . up . is_some ( ) {
319
+ if item . up . is_some ( ) {
318
320
// Disregarding the separator, add the "up" case to the tokens that should be
319
321
// examined.
320
322
// (remove this condition to make trailing seps ok)
321
323
if idx == len {
322
- let mut new_pos = ei . up . clone ( ) . unwrap ( ) ;
324
+ let mut new_pos = item . up . clone ( ) . unwrap ( ) ;
323
325
324
326
// update matches (the MBE "parse tree") by appending
325
327
// each tree as a subtree.
326
328
327
329
// Only touch the binders we have actually bound
328
- for idx in ei . match_lo ..ei . match_hi {
329
- let sub = ei . matches [ idx] . clone ( ) ;
330
- new_pos. push_match ( idx, MatchedSeq ( sub, Span { lo : ei . sp_lo , ..span } ) ) ;
330
+ for idx in item . match_lo ..item . match_hi {
331
+ let sub = item . matches [ idx] . clone ( ) ;
332
+ new_pos. push_match ( idx, MatchedSeq ( sub, Span { lo : item . sp_lo , ..span } ) ) ;
331
333
}
332
334
333
- new_pos. match_cur = ei . match_hi ;
335
+ new_pos. match_cur = item . match_hi ;
334
336
new_pos. idx += 1 ;
335
- cur_eis . push ( new_pos) ;
337
+ cur_items . push ( new_pos) ;
336
338
}
337
339
338
340
// Check if we need a separator
339
- if idx == len && ei . sep . is_some ( ) {
341
+ if idx == len && item . sep . is_some ( ) {
340
342
// We have a separator, and it is the current token.
341
- if ei . sep . as_ref ( ) . map ( |sep| token_name_eq ( token, sep) ) . unwrap_or ( false ) {
342
- ei . idx += 1 ;
343
- next_eis . push ( ei ) ;
343
+ if item . sep . as_ref ( ) . map ( |sep| token_name_eq ( token, sep) ) . unwrap_or ( false ) {
344
+ item . idx += 1 ;
345
+ next_items . push ( item ) ;
344
346
}
345
347
} else { // we don't need a separator
346
- ei . match_cur = ei . match_lo ;
347
- ei . idx = 0 ;
348
- cur_eis . push ( ei ) ;
348
+ item . match_cur = item . match_lo ;
349
+ item . idx = 0 ;
350
+ cur_items . push ( item ) ;
349
351
}
350
352
} else {
351
353
// We aren't repeating, so we must be potentially at the end of the input.
352
- eof_eis . push ( ei ) ;
354
+ eof_items . push ( item ) ;
353
355
}
354
356
} else {
355
- match ei . top_elts . get_tt ( idx) {
357
+ match item . top_elts . get_tt ( idx) {
356
358
/* need to descend into sequence */
357
359
TokenTree :: Sequence ( sp, seq) => {
358
360
if seq. op == quoted:: KleeneOp :: ZeroOrMore {
359
361
// Examine the case where there are 0 matches of this sequence
360
- let mut new_ei = ei . clone ( ) ;
361
- new_ei . match_cur += seq. num_captures ;
362
- new_ei . idx += 1 ;
363
- for idx in ei . match_cur ..ei . match_cur + seq. num_captures {
364
- new_ei . push_match ( idx, MatchedSeq ( Rc :: new ( vec ! [ ] ) , sp) ) ;
362
+ let mut new_item = item . clone ( ) ;
363
+ new_item . match_cur += seq. num_captures ;
364
+ new_item . idx += 1 ;
365
+ for idx in item . match_cur ..item . match_cur + seq. num_captures {
366
+ new_item . push_match ( idx, MatchedSeq ( Rc :: new ( vec ! [ ] ) , sp) ) ;
365
367
}
366
- cur_eis . push ( new_ei ) ;
368
+ cur_items . push ( new_item ) ;
367
369
}
368
370
369
371
// Examine the case where there is at least one match of this sequence
370
- let matches = create_matches ( ei . matches . len ( ) ) ;
371
- cur_eis . push ( Box :: new ( MatcherPos {
372
+ let matches = create_matches ( item . matches . len ( ) ) ;
373
+ cur_items . push ( Box :: new ( MatcherPos {
372
374
stack : vec ! [ ] ,
373
375
sep : seq. separator . clone ( ) ,
374
376
idx : 0 ,
375
377
matches : matches,
376
- match_lo : ei . match_cur ,
377
- match_cur : ei . match_cur ,
378
- match_hi : ei . match_cur + seq. num_captures ,
379
- up : Some ( ei ) ,
378
+ match_lo : item . match_cur ,
379
+ match_cur : item . match_cur ,
380
+ match_hi : item . match_cur + seq. num_captures ,
381
+ up : Some ( item ) ,
380
382
sp_lo : sp. lo ,
381
383
top_elts : Tt ( TokenTree :: Sequence ( sp, seq) ) ,
382
384
} ) ) ;
@@ -390,22 +392,22 @@ fn inner_parse_loop(sess: &ParseSess,
390
392
// Built-in nonterminals never start with these tokens,
391
393
// so we can eliminate them from consideration.
392
394
if may_begin_with ( & * id. name . as_str ( ) , token) {
393
- bb_eis . push ( ei ) ;
395
+ bb_items . push ( item ) ;
394
396
}
395
397
}
396
398
seq @ TokenTree :: Delimited ( ..) | seq @ TokenTree :: Token ( _, DocComment ( ..) ) => {
397
- let lower_elts = mem:: replace ( & mut ei . top_elts , Tt ( seq) ) ;
398
- let idx = ei . idx ;
399
- ei . stack . push ( MatcherTtFrame {
399
+ let lower_elts = mem:: replace ( & mut item . top_elts , Tt ( seq) ) ;
400
+ let idx = item . idx ;
401
+ item . stack . push ( MatcherTtFrame {
400
402
elts : lower_elts,
401
403
idx : idx,
402
404
} ) ;
403
- ei . idx = 0 ;
404
- cur_eis . push ( ei ) ;
405
+ item . idx = 0 ;
406
+ cur_items . push ( item ) ;
405
407
}
406
408
TokenTree :: Token ( _, ref t) if token_name_eq ( t, token) => {
407
- ei . idx += 1 ;
408
- next_eis . push ( ei ) ;
409
+ item . idx += 1 ;
410
+ next_items . push ( item ) ;
409
411
}
410
412
TokenTree :: Token ( ..) | TokenTree :: MetaVar ( ..) => { }
411
413
}
@@ -422,38 +424,38 @@ pub fn parse(sess: &ParseSess,
422
424
recurse_into_modules : bool )
423
425
-> NamedParseResult {
424
426
let mut parser = Parser :: new ( sess, tts, directory, recurse_into_modules, true ) ;
425
- let mut cur_eis = SmallVector :: one ( initial_matcher_pos ( ms. to_owned ( ) , parser. span . lo ) ) ;
426
- let mut next_eis = Vec :: new ( ) ; // or proceed normally
427
+ let mut cur_items = SmallVector :: one ( initial_matcher_pos ( ms. to_owned ( ) , parser. span . lo ) ) ;
428
+ let mut next_items = Vec :: new ( ) ; // or proceed normally
427
429
428
430
loop {
429
- let mut bb_eis = SmallVector :: new ( ) ; // black-box parsed by parser.rs
430
- let mut eof_eis = SmallVector :: new ( ) ;
431
- assert ! ( next_eis . is_empty( ) ) ;
431
+ let mut bb_items = SmallVector :: new ( ) ; // black-box parsed by parser.rs
432
+ let mut eof_items = SmallVector :: new ( ) ;
433
+ assert ! ( next_items . is_empty( ) ) ;
432
434
433
- match inner_parse_loop ( sess, & mut cur_eis , & mut next_eis , & mut eof_eis , & mut bb_eis ,
435
+ match inner_parse_loop ( sess, & mut cur_items , & mut next_items , & mut eof_items , & mut bb_items ,
434
436
& parser. token , parser. span ) {
435
437
Success ( _) => { } ,
436
438
Failure ( sp, tok) => return Failure ( sp, tok) ,
437
439
Error ( sp, msg) => return Error ( sp, msg) ,
438
440
}
439
441
440
- // inner parse loop handled all cur_eis , so it's empty
441
- assert ! ( cur_eis . is_empty( ) ) ;
442
+ // inner parse loop handled all cur_items , so it's empty
443
+ assert ! ( cur_items . is_empty( ) ) ;
442
444
443
445
/* error messages here could be improved with links to orig. rules */
444
446
if token_name_eq ( & parser. token , & token:: Eof ) {
445
- if eof_eis . len ( ) == 1 {
446
- let matches = eof_eis [ 0 ] . matches . iter_mut ( ) . map ( |mut dv| {
447
+ if eof_items . len ( ) == 1 {
448
+ let matches = eof_items [ 0 ] . matches . iter_mut ( ) . map ( |mut dv| {
447
449
Rc :: make_mut ( dv) . pop ( ) . unwrap ( )
448
450
} ) ;
449
451
return nameize ( sess, ms, matches) ;
450
- } else if eof_eis . len ( ) > 1 {
452
+ } else if eof_items . len ( ) > 1 {
451
453
return Error ( parser. span , "ambiguity: multiple successful parses" . to_string ( ) ) ;
452
454
} else {
453
455
return Failure ( parser. span , token:: Eof ) ;
454
456
}
455
- } else if ( !bb_eis . is_empty ( ) && !next_eis . is_empty ( ) ) || bb_eis . len ( ) > 1 {
456
- let nts = bb_eis . iter ( ) . map ( |ei | match ei . top_elts . get_tt ( ei . idx ) {
457
+ } else if ( !bb_items . is_empty ( ) && !next_items . is_empty ( ) ) || bb_items . len ( ) > 1 {
458
+ let nts = bb_items . iter ( ) . map ( |item | match item . top_elts . get_tt ( item . idx ) {
457
459
TokenTree :: MetaVarDecl ( _, bind, name) => {
458
460
format ! ( "{} ('{}')" , name, bind)
459
461
}
@@ -462,33 +464,33 @@ pub fn parse(sess: &ParseSess,
462
464
463
465
return Error ( parser. span , format ! (
464
466
"local ambiguity: multiple parsing options: {}" ,
465
- match next_eis . len( ) {
467
+ match next_items . len( ) {
466
468
0 => format!( "built-in NTs {}." , nts) ,
467
469
1 => format!( "built-in NTs {} or 1 other option." , nts) ,
468
470
n => format!( "built-in NTs {} or {} other options." , nts, n) ,
469
471
}
470
472
) ) ;
471
- } else if bb_eis . is_empty ( ) && next_eis . is_empty ( ) {
473
+ } else if bb_items . is_empty ( ) && next_items . is_empty ( ) {
472
474
return Failure ( parser. span , parser. token ) ;
473
- } else if !next_eis . is_empty ( ) {
475
+ } else if !next_items . is_empty ( ) {
474
476
/* Now process the next token */
475
- cur_eis . extend ( next_eis . drain ( ..) ) ;
477
+ cur_items . extend ( next_items . drain ( ..) ) ;
476
478
parser. bump ( ) ;
477
- } else /* bb_eis .len() == 1 */ {
478
- let mut ei = bb_eis . pop ( ) . unwrap ( ) ;
479
- if let TokenTree :: MetaVarDecl ( span, _, ident) = ei . top_elts . get_tt ( ei . idx ) {
480
- let match_cur = ei . match_cur ;
481
- ei . push_match ( match_cur,
479
+ } else /* bb_items .len() == 1 */ {
480
+ let mut item = bb_items . pop ( ) . unwrap ( ) ;
481
+ if let TokenTree :: MetaVarDecl ( span, _, ident) = item . top_elts . get_tt ( item . idx ) {
482
+ let match_cur = item . match_cur ;
483
+ item . push_match ( match_cur,
482
484
MatchedNonterminal ( Rc :: new ( parse_nt ( & mut parser, span, & ident. name . as_str ( ) ) ) ) ) ;
483
- ei . idx += 1 ;
484
- ei . match_cur += 1 ;
485
+ item . idx += 1 ;
486
+ item . match_cur += 1 ;
485
487
} else {
486
488
unreachable ! ( )
487
489
}
488
- cur_eis . push ( ei ) ;
490
+ cur_items . push ( item ) ;
489
491
}
490
492
491
- assert ! ( !cur_eis . is_empty( ) ) ;
493
+ assert ! ( !cur_items . is_empty( ) ) ;
492
494
}
493
495
}
494
496
0 commit comments