Skip to content

Commit 00a2f4c

Browse files
committed
fix: ecma ranges with set terminator
Fix ECMAScript un-escaped literal '-' when followed predefined character sets. Also: * Fixed missing error check on parseProperty() call. * Use addChar(ch) helper instead of addRange(ch, ch). Fixes #54
1 parent 3511044 commit 00a2f4c

File tree

4 files changed

+95
-7
lines changed

4 files changed

+95
-7
lines changed

go.mod

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
module github.com/dlclark/regexp2
22

33
go 1.13
4+
5+
require github.com/stretchr/testify v1.8.1

go.sum

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
3+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
5+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
6+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
7+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
8+
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
9+
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
10+
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
11+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
12+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
13+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
14+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
15+
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
16+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
17+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

regexp_ecma_test.go

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package regexp2_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/dlclark/regexp2"
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestECMA_basic(t *testing.T) {
11+
tests := map[string]struct {
12+
expr string
13+
data string
14+
want []string
15+
}{
16+
"charset": {
17+
expr: `[a-c]`,
18+
data: "abcd",
19+
want: []string{"a", "b", "c"},
20+
},
21+
"charset-set": {
22+
expr: `[a-\s]`,
23+
data: "a-b cd",
24+
want: []string{"a", "-", " "},
25+
},
26+
}
27+
28+
for name, tt := range tests {
29+
t.Run(name, func(t *testing.T) {
30+
re, err := regexp2.Compile(tt.expr, regexp2.ECMAScript)
31+
require.NoError(t, err)
32+
33+
match, err := re.FindStringMatch(tt.data)
34+
require.NoError(t, err)
35+
36+
var res []string
37+
for match != nil {
38+
for _, g := range match.Groups() {
39+
for _, c := range g.Captures {
40+
res = append(res, c.String())
41+
}
42+
}
43+
44+
match, err = re.FindNextMatch(match)
45+
require.NoError(t, err)
46+
}
47+
require.Equal(t, tt.want, res)
48+
})
49+
}
50+
}

syntax/parser.go

+26-7
Original file line numberDiff line numberDiff line change
@@ -1427,7 +1427,7 @@ func (p *parser) scanCapname() string {
14271427
return string(p.pattern[startpos:p.textpos()])
14281428
}
14291429

1430-
//Scans contents of [] (not including []'s), and converts to a set.
1430+
// Scans contents of [] (not including []'s), and converts to a set.
14311431
func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
14321432
ch := '\x00'
14331433
chPrev := '\x00'
@@ -1467,7 +1467,11 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
14671467
case 'D', 'd':
14681468
if !scanOnly {
14691469
if inRange {
1470-
return nil, p.getErr(ErrBadClassInCharRange, ch)
1470+
if !p.useOptionE() {
1471+
return nil, p.getErr(ErrBadClassInCharRange, ch)
1472+
}
1473+
cc.addChar('-')
1474+
cc.addChar(chPrev)
14711475
}
14721476
cc.addDigit(p.useOptionE() || p.useRE2(), ch == 'D', p.patternRaw)
14731477
}
@@ -1476,7 +1480,11 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
14761480
case 'S', 's':
14771481
if !scanOnly {
14781482
if inRange {
1479-
return nil, p.getErr(ErrBadClassInCharRange, ch)
1483+
if !p.useOptionE() {
1484+
return nil, p.getErr(ErrBadClassInCharRange, ch)
1485+
}
1486+
cc.addChar('-')
1487+
cc.addChar(chPrev)
14801488
}
14811489
cc.addSpace(p.useOptionE(), p.useRE2(), ch == 'S')
14821490
}
@@ -1485,7 +1493,11 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
14851493
case 'W', 'w':
14861494
if !scanOnly {
14871495
if inRange {
1488-
return nil, p.getErr(ErrBadClassInCharRange, ch)
1496+
if !p.useOptionE() {
1497+
return nil, p.getErr(ErrBadClassInCharRange, ch)
1498+
}
1499+
cc.addChar('-')
1500+
cc.addChar(chPrev)
14891501
}
14901502

14911503
cc.addWord(p.useOptionE() || p.useRE2(), ch == 'W')
@@ -1495,22 +1507,29 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
14951507
case 'p', 'P':
14961508
if !scanOnly {
14971509
if inRange {
1498-
return nil, p.getErr(ErrBadClassInCharRange, ch)
1510+
if !p.useOptionE() {
1511+
return nil, p.getErr(ErrBadClassInCharRange, ch)
1512+
}
1513+
cc.addChar('-')
1514+
cc.addChar(chPrev)
14991515
}
15001516
prop, err := p.parseProperty()
15011517
if err != nil {
15021518
return nil, err
15031519
}
15041520
cc.addCategory(prop, (ch != 'p'), caseInsensitive, p.patternRaw)
15051521
} else {
1506-
p.parseProperty()
1522+
_, err := p.parseProperty()
1523+
if err != nil {
1524+
return nil, err
1525+
}
15071526
}
15081527

15091528
continue
15101529

15111530
case '-':
15121531
if !scanOnly {
1513-
cc.addRange(ch, ch)
1532+
cc.addChar(ch)
15141533
}
15151534
continue
15161535

0 commit comments

Comments
 (0)