forked from mvdan/sh
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
2486 lines (2340 loc) · 58.5 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) 2016, Daniel Martí <[email protected]>
// See LICENSE for licensing information
package syntax
import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"unicode/utf8"
)
// ParserOption is a function which can be passed to NewParser
// to alter its behavior. To apply option to existing Parser
// call it directly, for example KeepComments(true)(parser).
type ParserOption func(*Parser)
// KeepComments makes the parser parse comments and attach them to
// nodes, as opposed to discarding them.
func KeepComments(enabled bool) ParserOption {
return func(p *Parser) { p.keepComments = enabled }
}
// LangVariant describes a shell language variant to use when tokenizing and
// parsing shell code. The zero value is LangBash.
type LangVariant int
const (
// LangBash corresponds to the GNU Bash language, as described in its
// manual at https://www.gnu.org/software/bash/manual/bash.html.
//
// We currently follow Bash version 5.2.
//
// Its string representation is "bash".
LangBash LangVariant = iota
// LangPOSIX corresponds to the POSIX Shell language, as described at
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html.
//
// Its string representation is "posix" or "sh".
LangPOSIX
// LangMirBSDKorn corresponds to the MirBSD Korn Shell, also known as
// mksh, as described at http://www.mirbsd.org/htman/i386/man1/mksh.htm.
// Note that it shares some features with Bash, due to the the shared
// ancestry that is ksh.
//
// We currently follow mksh version 59.
//
// Its string representation is "mksh".
LangMirBSDKorn
// LangBats corresponds to the Bash Automated Testing System language,
// as described at https://github.com/bats-core/bats-core. Note that
// it's just a small extension of the Bash language.
//
// Its string representation is "bats".
LangBats
// LangAuto corresponds to automatic language detection,
// commonly used by end-user applications like shfmt,
// which can guess a file's language variant given its filename or shebang.
//
// At this time, the Parser does not support LangAuto.
LangAuto
)
// Variant changes the shell language variant that the parser will
// accept.
//
// The passed language variant must be one of the constant values defined in
// this package.
func Variant(l LangVariant) ParserOption {
switch l {
case LangBash, LangPOSIX, LangMirBSDKorn, LangBats:
case LangAuto:
panic("LangAuto is not supported by the parser at this time")
default:
panic(fmt.Sprintf("unknown shell language variant: %d", l))
}
return func(p *Parser) { p.lang = l }
}
func (l LangVariant) String() string {
switch l {
case LangBash:
return "bash"
case LangPOSIX:
return "posix"
case LangMirBSDKorn:
return "mksh"
case LangBats:
return "bats"
case LangAuto:
return "auto"
}
return "unknown shell language variant"
}
func (l *LangVariant) Set(s string) error {
switch s {
case "bash":
*l = LangBash
case "posix", "sh":
*l = LangPOSIX
case "mksh":
*l = LangMirBSDKorn
case "bats":
*l = LangBats
case "auto":
*l = LangAuto
default:
return fmt.Errorf("unknown shell language variant: %q", s)
}
return nil
}
func (l LangVariant) isBash() bool {
return l == LangBash || l == LangBats
}
// StopAt configures the lexer to stop at an arbitrary word, treating it
// as if it were the end of the input. It can contain any characters
// except whitespace, and cannot be over four bytes in size.
//
// This can be useful to embed shell code within another language, as
// one can use a special word to mark the delimiters between the two.
//
// As a word, it will only apply when following whitespace or a
// separating token. For example, StopAt("$$") will act on the inputs
// "foo $$" and "foo;$$", but not on "foo '$$'".
//
// The match is done by prefix, so the example above will also act on
// "foo $$bar".
func StopAt(word string) ParserOption {
if len(word) > 4 {
panic("stop word can't be over four bytes in size")
}
if strings.ContainsAny(word, " \t\n\r") {
panic("stop word can't contain whitespace characters")
}
return func(p *Parser) { p.stopAt = []byte(word) }
}
// NewParser allocates a new Parser and applies any number of options.
func NewParser(options ...ParserOption) *Parser {
p := &Parser{}
for _, opt := range options {
opt(p)
}
return p
}
// Parse reads and parses a shell program with an optional name. It
// returns the parsed program if no issues were encountered. Otherwise,
// an error is returned. Reads from r are buffered.
//
// Parse can be called more than once, but not concurrently. That is, a
// Parser can be reused once it is done working.
func (p *Parser) Parse(r io.Reader, name string) (*File, error) {
p.reset()
p.f = &File{Name: name}
p.src = r
p.rune()
p.next()
p.f.Stmts, p.f.Last = p.stmtList()
if p.err == nil {
// EOF immediately after heredoc word so no newline to
// trigger it
p.doHeredocs()
}
return p.f, p.err
}
// Stmts reads and parses statements one at a time, calling a function
// each time one is parsed. If the function returns false, parsing is
// stopped and the function is not called again.
func (p *Parser) Stmts(r io.Reader, fn func(*Stmt) bool) error {
p.reset()
p.f = &File{}
p.src = r
p.rune()
p.next()
p.stmts(fn)
if p.err == nil {
// EOF immediately after heredoc word so no newline to
// trigger it
p.doHeredocs()
}
return p.err
}
type wrappedReader struct {
*Parser
io.Reader
lastLine int
accumulated []*Stmt
fn func([]*Stmt) bool
}
func (w *wrappedReader) Read(p []byte) (n int, err error) {
// If we lexed a newline for the first time, we just finished a line, so
// we may need to give a callback for the edge cases below not covered
// by Parser.Stmts.
if (w.r == '\n' || w.r == escNewl) && w.line > w.lastLine {
if w.Incomplete() {
// Incomplete statement; call back to print "> ".
if !w.fn(w.accumulated) {
return 0, io.EOF
}
} else if len(w.accumulated) == 0 {
// Nothing was parsed; call back to print another "$ ".
if !w.fn(nil) {
return 0, io.EOF
}
}
w.lastLine = w.line
}
return w.Reader.Read(p)
}
// Interactive implements what is necessary to parse statements in an
// interactive shell. The parser will call the given function under two
// circumstances outlined below.
//
// If a line containing any number of statements is parsed, the function will be
// called with said statements.
//
// If a line ending in an incomplete statement is parsed, the function will be
// called with any fully parsed statements, and [Parser.Incomplete] will return true.
//
// One can imagine a simple interactive shell implementation as follows:
//
// fmt.Fprintf(os.Stdout, "$ ")
// parser.Interactive(os.Stdin, func(stmts []*syntax.Stmt) bool {
// if parser.Incomplete() {
// fmt.Fprintf(os.Stdout, "> ")
// return true
// }
// run(stmts)
// fmt.Fprintf(os.Stdout, "$ ")
// return true
// }
//
// If the callback function returns false, parsing is stopped and the function
// is not called again.
func (p *Parser) Interactive(r io.Reader, fn func([]*Stmt) bool) error {
w := wrappedReader{Parser: p, Reader: r, fn: fn}
return p.Stmts(&w, func(stmt *Stmt) bool {
w.accumulated = append(w.accumulated, stmt)
// We finished parsing a statement and we're at a newline token,
// so we finished fully parsing a number of statements. Call
// back to run the statements and print "$ ".
if p.tok == _Newl {
if !fn(w.accumulated) {
return false
}
w.accumulated = w.accumulated[:0]
// The callback above would already print "$ ", so we
// don't want the subsequent wrappedReader.Read to cause
// another "$ " print thinking that nothing was parsed.
w.lastLine = w.line + 1
}
return true
})
}
// Words reads and parses words one at a time, calling a function each time one
// is parsed. If the function returns false, parsing is stopped and the function
// is not called again.
//
// Newlines are skipped, meaning that multi-line input will work fine. If the
// parser encounters a token that isn't a word, such as a semicolon, an error
// will be returned.
//
// Note that the lexer doesn't currently tokenize spaces, so it may need to read
// a non-space byte such as a newline or a letter before finishing the parsing
// of a word. This will be fixed in the future.
func (p *Parser) Words(r io.Reader, fn func(*Word) bool) error {
p.reset()
p.f = &File{}
p.src = r
p.rune()
p.next()
for {
p.got(_Newl)
w := p.getWord()
if w == nil {
if p.tok != _EOF {
p.curErr("%s is not a valid word", p.tok)
}
return p.err
}
if !fn(w) {
return nil
}
}
}
// Document parses a single here-document word. That is, it parses the input as
// if they were lines following a <<EOF redirection.
//
// In practice, this is the same as parsing the input as if it were within
// double quotes, but without having to escape all double quote characters.
// Similarly, the here-document word parsed here cannot be ended by any
// delimiter other than reaching the end of the input.
func (p *Parser) Document(r io.Reader) (*Word, error) {
p.reset()
p.f = &File{}
p.src = r
p.rune()
p.quote = hdocBody
p.hdocStops = [][]byte{[]byte("MVDAN_CC_SH_SYNTAX_EOF")}
p.parsingDoc = true
p.next()
w := p.getWord()
return w, p.err
}
// Arithmetic parses a single arithmetic expression. That is, as if the input
// were within the $(( and )) tokens.
func (p *Parser) Arithmetic(r io.Reader) (ArithmExpr, error) {
p.reset()
p.f = &File{}
p.src = r
p.rune()
p.quote = arithmExpr
p.next()
expr := p.arithmExpr(false)
return expr, p.err
}
// Parser holds the internal state of the parsing mechanism of a
// program.
type Parser struct {
src io.Reader
bs []byte // current chunk of read bytes
bsp int // pos within chunk for the rune after r
r rune // next rune
w int // width of r
f *File
spaced bool // whether tok has whitespace on its left
err error // lexer/parser error
readErr error // got a read error, but bytes left
tok token // current token
val string // current value (valid if tok is _Lit*)
// position of r, to be converted to Parser.pos later
offs, line, col int
pos Pos // position of tok
// TODO: Guard against offset overflow too. Less likely as it's 32-bit,
// whereas line and col are 16-bit.
lineOverflow bool
colOverflow bool
quote quoteState // current lexer state
eqlOffs int // position of '=' in val (a literal)
keepComments bool
lang LangVariant
stopAt []byte
forbidNested bool
// list of pending heredoc bodies
buriedHdocs int
heredocs []*Redirect
hdocStops [][]byte // stack of end words for open heredocs
parsingDoc bool // true if using Parser.Document
// openStmts is how many entire statements we're currently parsing. A
// non-zero number means that we require certain tokens or words before
// reaching EOF.
openStmts int
// openBquotes is how many levels of backquotes are open at the moment.
openBquotes int
// lastBquoteEsc is how many times the last backquote token was escaped
lastBquoteEsc int
rxOpenParens int
rxFirstPart bool
accComs []Comment
curComs *[]Comment
litBatch []Lit
wordBatch []wordAlloc
stmtBatch []Stmt
callBatch []callAlloc
readBuf [bufSize]byte
litBuf [bufSize]byte
litBs []byte
}
// Incomplete reports whether the parser is waiting to read more bytes because
// it needs to finish properly parsing a statement.
//
// It is only safe to call while the parser is blocked on a read. For an example
// use case, see [Parser.Interactive].
func (p *Parser) Incomplete() bool {
// If we're in a quote state other than noState, we're parsing a node
// such as a double-quoted string.
// If there are any open statements, we need to finish them.
// If we're constructing a literal, we need to finish it.
return p.quote != noState || p.openStmts > 0 || p.litBs != nil
}
const bufSize = 1 << 10
func (p *Parser) reset() {
p.tok, p.val = illegalTok, ""
p.eqlOffs = 0
p.bs, p.bsp = nil, 0
p.offs, p.line, p.col = 0, 1, 1
p.r, p.w = 0, 0
p.err, p.readErr = nil, nil
p.quote, p.forbidNested = noState, false
p.openStmts = 0
p.heredocs, p.buriedHdocs = p.heredocs[:0], 0
p.parsingDoc = false
p.openBquotes = 0
p.accComs, p.curComs = nil, &p.accComs
p.litBatch = nil
p.wordBatch = nil
p.stmtBatch = nil
p.callBatch = nil
}
func (p *Parser) nextPos() Pos {
// TODO: detect offset overflow while lexing as well.
var line, col uint
if !p.lineOverflow {
line = uint(p.line)
}
if !p.colOverflow {
col = uint(p.col)
}
return NewPos(uint(p.offs+p.bsp-p.w), line, col)
}
func (p *Parser) lit(pos Pos, val string) *Lit {
if len(p.litBatch) == 0 {
p.litBatch = make([]Lit, 64)
}
l := &p.litBatch[0]
p.litBatch = p.litBatch[1:]
l.ValuePos = pos
l.ValueEnd = p.nextPos()
l.Value = val
return l
}
type wordAlloc struct {
word Word
parts [1]WordPart
}
func (p *Parser) wordAnyNumber() *Word {
if len(p.wordBatch) == 0 {
p.wordBatch = make([]wordAlloc, 32)
}
alloc := &p.wordBatch[0]
p.wordBatch = p.wordBatch[1:]
w := &alloc.word
w.Parts = p.wordParts(alloc.parts[:0])
return w
}
func (p *Parser) wordOne(part WordPart) *Word {
if len(p.wordBatch) == 0 {
p.wordBatch = make([]wordAlloc, 32)
}
alloc := &p.wordBatch[0]
p.wordBatch = p.wordBatch[1:]
w := &alloc.word
w.Parts = alloc.parts[:1]
w.Parts[0] = part
return w
}
func (p *Parser) stmt(pos Pos) *Stmt {
if len(p.stmtBatch) == 0 {
p.stmtBatch = make([]Stmt, 32)
}
s := &p.stmtBatch[0]
p.stmtBatch = p.stmtBatch[1:]
s.Position = pos
return s
}
type callAlloc struct {
ce CallExpr
ws [4]*Word
}
func (p *Parser) call(w *Word) *CallExpr {
if len(p.callBatch) == 0 {
p.callBatch = make([]callAlloc, 32)
}
alloc := &p.callBatch[0]
p.callBatch = p.callBatch[1:]
ce := &alloc.ce
ce.Args = alloc.ws[:1]
ce.Args[0] = w
return ce
}
//go:generate stringer -type=quoteState
type quoteState uint32
const (
noState quoteState = 1 << iota
subCmd
subCmdBckquo
dblQuotes
hdocWord
hdocBody
hdocBodyTabs
arithmExpr
arithmExprLet
arithmExprCmd
arithmExprBrack
testExpr
testExprRegexp
switchCase
paramExpName
paramExpSlice
paramExpRepl
paramExpExp
arrayElems
allKeepSpaces = paramExpRepl | dblQuotes | hdocBody |
hdocBodyTabs | paramExpExp
allRegTokens = noState | subCmd | subCmdBckquo | hdocWord |
switchCase | arrayElems | testExpr
allArithmExpr = arithmExpr | arithmExprLet | arithmExprCmd |
arithmExprBrack | paramExpSlice
allParamReg = paramExpName | paramExpSlice
allParamExp = allParamReg | paramExpRepl | paramExpExp | arithmExprBrack
)
type saveState struct {
quote quoteState
buriedHdocs int
}
func (p *Parser) preNested(quote quoteState) (s saveState) {
s.quote, s.buriedHdocs = p.quote, p.buriedHdocs
p.buriedHdocs, p.quote = len(p.heredocs), quote
return
}
func (p *Parser) postNested(s saveState) {
p.quote, p.buriedHdocs = s.quote, s.buriedHdocs
}
func (p *Parser) unquotedWordBytes(w *Word) ([]byte, bool) {
buf := make([]byte, 0, 4)
didUnquote := false
for _, wp := range w.Parts {
buf, didUnquote = p.unquotedWordPart(buf, wp, false)
}
return buf, didUnquote
}
func (p *Parser) unquotedWordPart(buf []byte, wp WordPart, quotes bool) (_ []byte, quoted bool) {
switch x := wp.(type) {
case *Lit:
for i := 0; i < len(x.Value); i++ {
if b := x.Value[i]; b == '\\' && !quotes {
if i++; i < len(x.Value) {
buf = append(buf, x.Value[i])
}
quoted = true
} else {
buf = append(buf, b)
}
}
case *SglQuoted:
buf = append(buf, []byte(x.Value)...)
quoted = true
case *DblQuoted:
for _, wp2 := range x.Parts {
buf, _ = p.unquotedWordPart(buf, wp2, true)
}
quoted = true
}
return buf, quoted
}
func (p *Parser) doHeredocs() {
hdocs := p.heredocs[p.buriedHdocs:]
if len(hdocs) == 0 {
// Nothing do do; don't even issue a read.
return
}
p.rune() // consume '\n', since we know p.tok == _Newl
old := p.quote
p.heredocs = p.heredocs[:p.buriedHdocs]
for i, r := range hdocs {
if p.err != nil {
break
}
p.quote = hdocBody
if r.Op == DashHdoc {
p.quote = hdocBodyTabs
}
stop, quoted := p.unquotedWordBytes(r.Word)
p.hdocStops = append(p.hdocStops, stop)
if i > 0 && p.r == '\n' {
p.rune()
}
lastLine := p.line
if quoted {
r.Hdoc = p.quotedHdocWord()
} else {
p.next()
r.Hdoc = p.getWord()
}
if r.Hdoc != nil {
lastLine = int(r.Hdoc.End().Line())
}
if lastLine < p.line {
// TODO: It seems like this triggers more often than it
// should. Look into it.
l := p.lit(p.nextPos(), "")
if r.Hdoc == nil {
r.Hdoc = p.wordOne(l)
} else {
r.Hdoc.Parts = append(r.Hdoc.Parts, l)
}
}
if stop := p.hdocStops[len(p.hdocStops)-1]; stop != nil {
p.posErr(r.Pos(), "unclosed here-document '%s'", stop)
}
p.hdocStops = p.hdocStops[:len(p.hdocStops)-1]
}
p.quote = old
}
func (p *Parser) got(tok token) bool {
if p.tok == tok {
p.next()
return true
}
return false
}
func (p *Parser) gotRsrv(val string) (Pos, bool) {
pos := p.pos
if p.tok == _LitWord && p.val == val {
p.next()
return pos, true
}
return pos, false
}
func readableStr(s string) string {
// don't quote tokens like & or }
if s != "" && s[0] >= 'a' && s[0] <= 'z' {
return strconv.Quote(s)
}
return s
}
func (p *Parser) followErr(pos Pos, left, right string) {
leftStr := readableStr(left)
p.posErr(pos, "%s must be followed by %s", leftStr, right)
}
func (p *Parser) followErrExp(pos Pos, left string) {
p.followErr(pos, left, "an expression")
}
func (p *Parser) follow(lpos Pos, left string, tok token) {
if !p.got(tok) {
p.followErr(lpos, left, tok.String())
}
}
func (p *Parser) followRsrv(lpos Pos, left, val string) Pos {
pos, ok := p.gotRsrv(val)
if !ok {
p.followErr(lpos, left, fmt.Sprintf("%q", val))
}
return pos
}
func (p *Parser) followStmts(left string, lpos Pos, stops ...string) ([]*Stmt, []Comment) {
if p.got(semicolon) {
return nil, nil
}
newLine := p.got(_Newl)
stmts, last := p.stmtList(stops...)
if len(stmts) < 1 && !newLine {
p.followErr(lpos, left, "a statement list")
}
return stmts, last
}
func (p *Parser) followWordTok(tok token, pos Pos) *Word {
w := p.getWord()
if w == nil {
p.followErr(pos, tok.String(), "a word")
}
return w
}
func (p *Parser) stmtEnd(n Node, start, end string) Pos {
pos, ok := p.gotRsrv(end)
if !ok {
p.posErr(n.Pos(), "%s statement must end with %q", start, end)
}
return pos
}
func (p *Parser) quoteErr(lpos Pos, quote token) {
p.posErr(lpos, "reached %s without closing quote %s",
p.tok.String(), quote)
}
func (p *Parser) matchingErr(lpos Pos, left, right any) {
p.posErr(lpos, "reached %s without matching %s with %s",
p.tok.String(), left, right)
}
func (p *Parser) matched(lpos Pos, left, right token) Pos {
pos := p.pos
if !p.got(right) {
p.matchingErr(lpos, left, right)
}
return pos
}
func (p *Parser) errPass(err error) {
if p.err == nil {
p.err = err
p.bsp = len(p.bs) + 1
p.r = utf8.RuneSelf
p.w = 1
p.tok = _EOF
}
}
// IsIncomplete reports whether a Parser error could have been avoided with
// extra input bytes. For example, if an [io.EOF] was encountered while there was
// an unclosed quote or parenthesis.
func IsIncomplete(err error) bool {
perr, ok := err.(ParseError)
return ok && perr.Incomplete
}
// IsKeyword returns true if the given word is part of the language keywords.
func IsKeyword(word string) bool {
// This list has been copied from the bash 5.1 source code, file y.tab.c +4460
switch word {
case
"!",
"[[", // only if COND_COMMAND is defined
"]]", // only if COND_COMMAND is defined
"case",
"coproc", // only if COPROCESS_SUPPORT is defined
"do",
"done",
"else",
"esac",
"fi",
"for",
"function",
"if",
"in",
"select", // only if SELECT_COMMAND is defined
"then",
"time", // only if COMMAND_TIMING is defined
"until",
"while",
"{",
"}":
return true
}
return false
}
// ParseError represents an error found when parsing a source file, from which
// the parser cannot recover.
type ParseError struct {
Filename string
Pos Pos
Text string
Incomplete bool
}
func (e ParseError) Error() string {
if e.Filename == "" {
return fmt.Sprintf("%s: %s", e.Pos.String(), e.Text)
}
return fmt.Sprintf("%s:%s: %s", e.Filename, e.Pos.String(), e.Text)
}
// LangError is returned when the parser encounters code that is only valid in
// other shell language variants. The error includes what feature is not present
// in the current language variant, and what languages support it.
type LangError struct {
Filename string
Pos Pos
Feature string
Langs []LangVariant
}
func (e LangError) Error() string {
var buf bytes.Buffer
if e.Filename != "" {
buf.WriteString(e.Filename + ":")
}
buf.WriteString(e.Pos.String() + ": ")
buf.WriteString(e.Feature)
if strings.HasSuffix(e.Feature, "s") {
buf.WriteString(" are a ")
} else {
buf.WriteString(" is a ")
}
for i, lang := range e.Langs {
if i > 0 {
buf.WriteString("/")
}
buf.WriteString(lang.String())
}
buf.WriteString(" feature")
return buf.String()
}
func (p *Parser) posErr(pos Pos, format string, a ...any) {
p.errPass(ParseError{
Filename: p.f.Name,
Pos: pos,
Text: fmt.Sprintf(format, a...),
Incomplete: p.tok == _EOF && p.Incomplete(),
})
}
func (p *Parser) curErr(format string, a ...any) {
p.posErr(p.pos, format, a...)
}
func (p *Parser) langErr(pos Pos, feature string, langs ...LangVariant) {
p.errPass(LangError{
Filename: p.f.Name,
Pos: pos,
Feature: feature,
Langs: langs,
})
}
func (p *Parser) stmts(fn func(*Stmt) bool, stops ...string) {
gotEnd := true
loop:
for p.tok != _EOF {
newLine := p.got(_Newl)
switch p.tok {
case _LitWord:
for _, stop := range stops {
if p.val == stop {
break loop
}
}
case rightParen:
if p.quote == subCmd {
break loop
}
case bckQuote:
if p.backquoteEnd() {
break loop
}
case dblSemicolon, semiAnd, dblSemiAnd, semiOr:
if p.quote == switchCase {
break loop
}
p.curErr("%s can only be used in a case clause", p.tok)
}
if !newLine && !gotEnd {
p.curErr("statements must be separated by &, ; or a newline")
}
if p.tok == _EOF {
break
}
p.openStmts++
s := p.getStmt(true, false, false)
p.openStmts--
if s == nil {
p.invalidStmtStart()
break
}
gotEnd = s.Semicolon.IsValid()
if !fn(s) {
break
}
}
}
func (p *Parser) stmtList(stops ...string) ([]*Stmt, []Comment) {
var stmts []*Stmt
var last []Comment
fn := func(s *Stmt) bool {
stmts = append(stmts, s)
return true
}
p.stmts(fn, stops...)
split := len(p.accComs)
if p.tok == _LitWord && (p.val == "elif" || p.val == "else" || p.val == "fi") {
// Split the comments, so that any aligned with an opening token
// get attached to it. For example:
//
// if foo; then
// # inside the body
// # document the else
// else
// fi
// TODO(mvdan): look into deduplicating this with similar logic
// in caseItems.
for i := len(p.accComs) - 1; i >= 0; i-- {
c := p.accComs[i]
if c.Pos().Col() != p.pos.Col() {
break
}
split = i
}
}
if split > 0 { // keep last nil if empty
last = p.accComs[:split]
}
p.accComs = p.accComs[split:]
return stmts, last
}
func (p *Parser) invalidStmtStart() {
switch p.tok {
case semicolon, and, or, andAnd, orOr:
p.curErr("%s can only immediately follow a statement", p.tok)
case rightParen:
p.curErr("%s can only be used to close a subshell", p.tok)
default:
p.curErr("%s is not a valid start for a statement", p.tok)
}
}
func (p *Parser) getWord() *Word {
if w := p.wordAnyNumber(); len(w.Parts) > 0 && p.err == nil {
return w
}
return nil
}
func (p *Parser) getLit() *Lit {
switch p.tok {
case _Lit, _LitWord, _LitRedir:
l := p.lit(p.pos, p.val)
p.next()
return l
}
return nil
}
func (p *Parser) wordParts(wps []WordPart) []WordPart {
for {
n := p.wordPart()
if n == nil {
if len(wps) == 0 {
return nil // normalize empty lists into nil
}
return wps
}
wps = append(wps, n)
if p.spaced {
return wps
}
}
}
func (p *Parser) ensureNoNested() {
if p.forbidNested {
p.curErr("expansions not allowed in heredoc words")
}
}