Skip to content
This repository was archived by the owner on Jan 5, 2023. It is now read-only.

Commit 235b7c0

Browse files
authored
Merge pull request #395 from sauyon/regexp
SuspiciousCharacterInRegexp: Add fix for raw string literals
2 parents 33f4362 + 0950baf commit 235b7c0

9 files changed

+76
-18
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* The query "Suspicious characters in a regular expression" has been improved to recognize raw string literals, which should lead to fewer false positives.

ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ func broken(hostNames []byte) string {
99
} else {
1010
// This will be reached even if hostNames is exactly "forbidden.host.org",
1111
// because the literal backspace is not matched
12+
return ""
1213
}
1314
}

ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.ql

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,10 @@ import DataFlow::PathGraph
2020
*/
2121
predicate containsEscapedCharacter(DataFlow::Node source, string character) {
2222
character in ["a", "b"] and
23-
exists(
23+
exists(StringLit s | s = source.asExpr() |
2424
// Search for `character` preceded by an odd number of backslashes:
25-
source
26-
.asExpr()
27-
.(BasicLit)
28-
.getText()
29-
.regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10})" + character, _, _)
25+
exists(s.getText().regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10})" + character, _, _)) and
26+
not s.isRaw()
3027
)
3128
}
3229

ql/src/Security/CWE-020/SuspiciousCharacterInRegexpGood.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ package main
33
import "regexp"
44

55
func fixed(hostNames []byte) string {
6-
var hostRe = regexp.MustCompile("\\bforbidden.host.org")
6+
var hostRe = regexp.MustCompile(`\bforbidden.host.org`)
77
if hostRe.Match(hostNames) {
88
return "Must not target forbidden.host.org"
99
} else {
1010
// hostNames definitely doesn't contain a word "forbidden.host.org", as "\\b"
1111
// is the start-of-word anchor, not a literal backspace.
12+
return ""
1213
}
1314
}

ql/src/semmle/go/Expr.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ class RuneLit = CharLit;
343343
*/
344344
class StringLit extends @stringlit, BasicLit {
345345
override string getAPrimaryQlClass() { result = "StringLit" }
346+
347+
predicate isRaw() { this.getText().matches("`%`") }
346348
}
347349

348350
/**
Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
11
edges
22
nodes
3-
| test.go:8:21:8:34 | "hello\\aworld" | semmle.label | "hello\\aworld" |
4-
| test.go:9:21:9:36 | "hello\\\\\\aworld" | semmle.label | "hello\\\\\\aworld" |
5-
| test.go:10:21:10:34 | "hello\\bworld" | semmle.label | "hello\\bworld" |
6-
| test.go:11:21:11:36 | "hello\\\\\\bworld" | semmle.label | "hello\\\\\\bworld" |
3+
| SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | semmle.label | "\\bforbidden.host.org" |
4+
| test.go:7:21:7:24 | "\\a" | semmle.label | "\\a" |
5+
| test.go:9:21:9:26 | "\\\\\\a" | semmle.label | "\\\\\\a" |
6+
| test.go:10:21:10:27 | "x\\\\\\a" | semmle.label | "x\\\\\\a" |
7+
| test.go:12:21:12:28 | "\\\\\\\\\\a" | semmle.label | "\\\\\\\\\\a" |
8+
| test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | semmle.label | "\\\\\\\\\\\\\\a" |
9+
| test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | semmle.label | "\\\\\\\\\\\\\\\\\\a" |
10+
| test.go:20:21:20:34 | "hello\\aworld" | semmle.label | "hello\\aworld" |
11+
| test.go:21:21:21:36 | "hello\\\\\\aworld" | semmle.label | "hello\\\\\\aworld" |
12+
| test.go:22:21:22:34 | "hello\\bworld" | semmle.label | "hello\\bworld" |
13+
| test.go:23:21:23:36 | "hello\\\\\\bworld" | semmle.label | "hello\\\\\\bworld" |
714
#select
8-
| test.go:8:21:8:34 | "hello\\aworld" | test.go:8:21:8:34 | "hello\\aworld" | test.go:8:21:8:34 | "hello\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:8:21:8:34 | "hello\\aworld" | A regular expression | test.go:8:21:8:34 | "hello\\aworld" | here |
9-
| test.go:9:21:9:36 | "hello\\\\\\aworld" | test.go:9:21:9:36 | "hello\\\\\\aworld" | test.go:9:21:9:36 | "hello\\\\\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:9:21:9:36 | "hello\\\\\\aworld" | A regular expression | test.go:9:21:9:36 | "hello\\\\\\aworld" | here |
10-
| test.go:10:21:10:34 | "hello\\bworld" | test.go:10:21:10:34 | "hello\\bworld" | test.go:10:21:10:34 | "hello\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:10:21:10:34 | "hello\\bworld" | A regular expression | test.go:10:21:10:34 | "hello\\bworld" | here |
11-
| test.go:11:21:11:36 | "hello\\\\\\bworld" | test.go:11:21:11:36 | "hello\\\\\\bworld" | test.go:11:21:11:36 | "hello\\\\\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:11:21:11:36 | "hello\\\\\\bworld" | A regular expression | test.go:11:21:11:36 | "hello\\\\\\bworld" | here |
15+
| SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | A regular expression | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | here |
16+
| test.go:7:21:7:24 | "\\a" | test.go:7:21:7:24 | "\\a" | test.go:7:21:7:24 | "\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:7:21:7:24 | "\\a" | A regular expression | test.go:7:21:7:24 | "\\a" | here |
17+
| test.go:9:21:9:26 | "\\\\\\a" | test.go:9:21:9:26 | "\\\\\\a" | test.go:9:21:9:26 | "\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:9:21:9:26 | "\\\\\\a" | A regular expression | test.go:9:21:9:26 | "\\\\\\a" | here |
18+
| test.go:10:21:10:27 | "x\\\\\\a" | test.go:10:21:10:27 | "x\\\\\\a" | test.go:10:21:10:27 | "x\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:10:21:10:27 | "x\\\\\\a" | A regular expression | test.go:10:21:10:27 | "x\\\\\\a" | here |
19+
| test.go:12:21:12:28 | "\\\\\\\\\\a" | test.go:12:21:12:28 | "\\\\\\\\\\a" | test.go:12:21:12:28 | "\\\\\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:12:21:12:28 | "\\\\\\\\\\a" | A regular expression | test.go:12:21:12:28 | "\\\\\\\\\\a" | here |
20+
| test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | A regular expression | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | here |
21+
| test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | A regular expression | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | here |
22+
| test.go:20:21:20:34 | "hello\\aworld" | test.go:20:21:20:34 | "hello\\aworld" | test.go:20:21:20:34 | "hello\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:20:21:20:34 | "hello\\aworld" | A regular expression | test.go:20:21:20:34 | "hello\\aworld" | here |
23+
| test.go:21:21:21:36 | "hello\\\\\\aworld" | test.go:21:21:21:36 | "hello\\\\\\aworld" | test.go:21:21:21:36 | "hello\\\\\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:21:21:21:36 | "hello\\\\\\aworld" | A regular expression | test.go:21:21:21:36 | "hello\\\\\\aworld" | here |
24+
| test.go:22:21:22:34 | "hello\\bworld" | test.go:22:21:22:34 | "hello\\bworld" | test.go:22:21:22:34 | "hello\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:22:21:22:34 | "hello\\bworld" | A regular expression | test.go:22:21:22:34 | "hello\\bworld" | here |
25+
| test.go:23:21:23:36 | "hello\\\\\\bworld" | test.go:23:21:23:36 | "hello\\\\\\bworld" | test.go:23:21:23:36 | "hello\\\\\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:23:21:23:36 | "hello\\\\\\bworld" | A regular expression | test.go:23:21:23:36 | "hello\\\\\\bworld" | here |
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package main
2+
3+
import "regexp"
4+
5+
func broken(hostNames []byte) string {
6+
var hostRe = regexp.MustCompile("\bforbidden.host.org")
7+
if hostRe.Match(hostNames) {
8+
return "Must not target forbidden.host.org"
9+
} else {
10+
// This will be reached even if hostNames is exactly "forbidden.host.org",
11+
// because the literal backspace is not matched
12+
return ""
13+
}
14+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package main
2+
3+
import "regexp"
4+
5+
func fixed(hostNames []byte) string {
6+
var hostRe = regexp.MustCompile(`\bforbidden.host.org`)
7+
if hostRe.Match(hostNames) {
8+
return "Must not target forbidden.host.org"
9+
} else {
10+
// hostNames definitely doesn't contain a word "forbidden.host.org", as "\\b"
11+
// is the start-of-word anchor, not a literal backspace.
12+
return ""
13+
}
14+
}

ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/test.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,20 @@
1-
package test
1+
package main
22

33
import "regexp"
44

5-
func test() {
5+
func main() {
6+
// many backslashes
7+
regexp.MustCompile("\a") // BAD
8+
regexp.MustCompile("\\a")
9+
regexp.MustCompile("\\\a") // BAD
10+
regexp.MustCompile("x\\\a") // BAD
11+
regexp.MustCompile("\\\\a")
12+
regexp.MustCompile("\\\\\a") // BAD
13+
regexp.MustCompile("\\\\\\a")
14+
regexp.MustCompile("\\\\\\\a") // BAD
15+
regexp.MustCompile("\\\\\\\\a")
16+
regexp.MustCompile("\\\\\\\\\a") // BAD
17+
regexp.MustCompile("\\\\\\\\\\a")
618

719
// BAD: probably a mistake:
820
regexp.MustCompile("hello\aworld")
@@ -20,5 +32,6 @@ func test() {
2032
regexp.MustCompile("hello\010world")
2133
regexp.MustCompile("hello\u0008world")
2234
regexp.MustCompile("hello\U00000008world")
23-
35+
// GOOD: use of a raw string literal
36+
regexp.MustCompile(`hello\b\sworld`)
2437
}

0 commit comments

Comments
 (0)