Skip to content

Commit 1e61cba

Browse files
Define Natvis for core regex types and add a framework for testing the visualizations to ensure they do not become stale.
1 parent fc6f5cc commit 1e61cba

File tree

6 files changed

+324
-0
lines changed

6 files changed

+324
-0
lines changed

.github/workflows/ci.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
- nightly
2828
- macos
2929
- win-msvc
30+
- win-msvc (nightly)
3031
- win-gnu
3132
include:
3233
- build: pinned
@@ -55,6 +56,9 @@ jobs:
5556
- build: win-msvc
5657
os: windows-latest
5758
rust: stable
59+
- build: win-msvc (nightly)
60+
os: windows-latest
61+
rust: nightly
5862
- build: win-gnu
5963
os: windows-latest
6064
rust: stable-x86_64-gnu
@@ -154,6 +158,13 @@ jobs:
154158
run: |
155159
cargo test --test default --no-default-features --features 'std pattern unicode-perl'
156160
161+
# The #[debugger_visualizer] attribute is currently gated behind an unstable feature flag.
162+
# In order to test the visualizers for the regex crate, they have to be tested on a nightly build.
163+
- if: matrix.build == 'win-msvc (nightly)'
164+
name: Run tests with debugger_visualizer feature
165+
run: |
166+
cargo test --test visualizers --features 'debugger_visualizer' -- --test-threads=1
167+
157168
rustfmt:
158169
name: rustfmt
159170
runs-on: ubuntu-18.04

Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ unstable = ["pattern"]
104104
# by default if the unstable feature is enabled.
105105
pattern = []
106106

107+
# Enable to use the #[debugger_visualizer] attribute.
108+
debugger_visualizer = []
109+
107110
# For very fast prefix literal matching.
108111
[dependencies.aho-corasick]
109112
version = "0.7.18"
@@ -132,6 +135,9 @@ rand = { version = "0.8.3", default-features = false, features = ["getrandom", "
132135
# See: https://github.com/rust-lang/regex/issues/684
133136
# See: https://github.com/rust-lang/regex/issues/685
134137
# doc-comment = "0.3"
138+
# To test debugger visualizers defined for the regex crate such as regex.natvis
139+
debugger_test = "0.1.0"
140+
debugger_test_parser = "0.1.0"
135141

136142
# Run the test suite on the default behavior of Regex::new.
137143
# This includes a mish mash of NFAs and DFAs, which are chosen automatically
@@ -184,6 +190,10 @@ name = "backtrack-bytes"
184190
path = "tests/test_crates_regex.rs"
185191
name = "crates-regex"
186192

193+
[[test]]
194+
path = "tests/test_visualizers.rs"
195+
name = "visualizers"
196+
187197
[profile.release]
188198
debug = true
189199

regex.natvis

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
3+
<Type Name="regex::re_builder::unicode::RegexBuilder">
4+
<DisplayString>{{ text={__0.pats[0]} }}</DisplayString>
5+
<Expand>
6+
<ExpandedItem>__0</ExpandedItem>
7+
</Expand>
8+
</Type>
9+
10+
<Type Name="regex::re_bytes::Captures">
11+
<Intrinsic Name="discriminant" Expression="locs.__0.buf.ptr.pointer.pointer[i].discriminant">
12+
<Parameter Name="i" Type="int" />
13+
</Intrinsic>
14+
<Intrinsic Name="location" Expression="locs.__0.buf.ptr.pointer.pointer[i].variant1.__0">
15+
<Parameter Name="i" Type="int" />
16+
</Intrinsic>
17+
<Intrinsic Name="match_length" Expression="location(end)-location(start)">
18+
<Parameter Name="start" Type="int" />
19+
<Parameter Name="end" Type="int" />
20+
</Intrinsic>
21+
<DisplayString>{{ named_groups={named_groups.ptr.pointer->data.base.table.table.items} }}</DisplayString>
22+
<Expand>
23+
<Item Name="[text]">text</Item>
24+
<Item Name="[named_groups]">named_groups</Item>
25+
<CustomListItems>
26+
<Variable Name="i" InitialValue="0" />
27+
<Variable Name="index" InitialValue="0" />
28+
<Variable Name="len" InitialValue="locs.__0.len" />
29+
<Loop>
30+
<Break Condition="i &gt;= len || discriminant(i) == 0" />
31+
<Item Name="{index}">(char*)text.data_ptr+location(i),[location(i+1)-location(i)]s8</Item>
32+
<Exec>i+=2</Exec>
33+
<Exec>index++</Exec>
34+
</Loop>
35+
</CustomListItems>
36+
</Expand>
37+
</Type>
38+
39+
<Type Name="regex::re_bytes::Match">
40+
<DisplayString>{text.data_ptr+start,[end-start]s8}</DisplayString>
41+
<Expand>
42+
<Item Name="[text]">text</Item>
43+
<Synthetic Name="[match_text]">
44+
<DisplayString>{(char*)text.data_ptr+start,[end-start]s8}</DisplayString>
45+
</Synthetic>
46+
<Item Name="[start]">start,d</Item>
47+
<Item Name="[end]">end,d</Item>
48+
</Expand>
49+
</Type>
50+
51+
<Type Name="regex::re_bytes::Regex">
52+
<DisplayString>{{ text={__0.ro.ptr.pointer->data.res[0]} }}</DisplayString>
53+
<Expand>
54+
<ExpandedItem>__0.ro</ExpandedItem>
55+
</Expand>
56+
</Type>
57+
58+
<Type Name="regex::re_unicode::Captures">
59+
<Intrinsic Name="discriminant" Expression="locs.__0.buf.ptr.pointer.pointer[i].discriminant">
60+
<Parameter Name="i" Type="int" />
61+
</Intrinsic>
62+
<Intrinsic Name="location" Expression="locs.__0.buf.ptr.pointer.pointer[i].variant1.__0">
63+
<Parameter Name="i" Type="int" />
64+
</Intrinsic>
65+
<Intrinsic Name="match_length" Expression="location(end)-location(start)">
66+
<Parameter Name="start" Type="int" />
67+
<Parameter Name="end" Type="int" />
68+
</Intrinsic>
69+
<DisplayString>{{ named_groups={named_groups.ptr.pointer->data.base.table.table.items} }}</DisplayString>
70+
<Expand>
71+
<Item Name="[text]">text</Item>
72+
<Item Name="[named_groups]">named_groups</Item>
73+
<CustomListItems>
74+
<Variable Name="i" InitialValue="0" />
75+
<Variable Name="index" InitialValue="0" />
76+
<Variable Name="len" InitialValue="locs.__0.len" />
77+
<Loop>
78+
<Break Condition="i &gt;= len || discriminant(i) == 0" />
79+
<Item Name="{index}">(char*)text.data_ptr+location(i),[location(i+1)-location(i)]s8</Item>
80+
<Exec>i+=2</Exec>
81+
<Exec>index++</Exec>
82+
</Loop>
83+
</CustomListItems>
84+
</Expand>
85+
</Type>
86+
87+
<Type Name="regex::re_unicode::Match">
88+
<DisplayString>{text.data_ptr+start,[end-start]s8}</DisplayString>
89+
<Expand>
90+
<Item Name="[text]">text</Item>
91+
<Synthetic Name="[match_text]">
92+
<DisplayString>{(char*)text.data_ptr+start,[end-start]s8}</DisplayString>
93+
</Synthetic>
94+
<Item Name="[start]">start,d</Item>
95+
<Item Name="[end]">end,d</Item>
96+
</Expand>
97+
</Type>
98+
99+
<Type Name="regex::re_unicode::Regex">
100+
<DisplayString>{{ text={__0.ro.ptr.pointer->data.res[0]} }}</DisplayString>
101+
<Expand>
102+
<ExpandedItem>__0.ro</ExpandedItem>
103+
</Expand>
104+
</Type>
105+
</AutoVisualizer>

src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,11 @@ another matching engine with fixed memory requirements.
607607

608608
#![deny(missing_docs)]
609609
#![cfg_attr(feature = "pattern", feature(pattern))]
610+
#![cfg_attr(feature = "debugger_visualizer", feature(debugger_visualizer))]
611+
#![cfg_attr(
612+
feature = "debugger_visualizer",
613+
debugger_visualizer(natvis_file = "../regex.natvis")
614+
)]
610615
#![warn(missing_debug_implementations)]
611616

612617
#[cfg(not(feature = "std"))]

tests/debugger_visualizer.rs

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
use debugger_test::debugger_test;
2+
3+
#[inline(never)]
4+
fn __break() {}
5+
6+
#[debugger_test(
7+
debugger = "cdb",
8+
commands = r#"
9+
.nvlist
10+
dv
11+
dx re
12+
dx captures
13+
g
14+
dx m1
15+
dx m2
16+
dx m3
17+
dx m4
18+
"#,
19+
expected_statements = r#"
20+
re : { text="^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})$" } [Type: regex::re_unicode::Regex]
21+
[<Raw View>] [Type: regex::re_unicode::Regex]
22+
[Reference count] : 0x2 [Type: core::sync::atomic::AtomicUsize]
23+
[Weak reference count] : 0x1 [Type: core::sync::atomic::AtomicUsize]
24+
[+0xc00] res : { len=0x1 } [Type: alloc::vec::Vec<alloc::string::String,alloc::alloc::Global>]
25+
[+0x000] nfa [Type: regex::prog::Program]
26+
[+0x320] dfa [Type: regex::prog::Program]
27+
[+0x640] dfa_reverse [Type: regex::prog::Program]
28+
[+0x960] suffixes [Type: regex::literal::imp::LiteralSearcher]
29+
[+0xc18] ac : None [Type: enum$<core::option::Option<aho_corasick::ahocorasick::AhoCorasick<u32> >, 0, 1, Some>]
30+
[+0xda0] match_type : Dfa [Type: enum$<regex::exec::MatchType>]
31+
32+
captures : { named_groups=0x3 } [Type: regex::re_unicode::Captures]
33+
[<Raw View>] [Type: regex::re_unicode::Captures]
34+
[text] : "2020-10-15" [Type: str]
35+
pattern:\[named_groups\] : \{ len=0x3 \} \[Type: .*\]
36+
pattern:\[0\] : .* : "2020-10-15" \[Type: char \*\]
37+
pattern:\[1\] : .* : "2020" \[Type: char \*\]
38+
pattern:\[2\] : .* : "10" \[Type: char \*\]
39+
pattern:\[3\] : .* : "15" \[Type: char \*\]
40+
41+
m1 : "2020-10-15" [Type: regex::re_unicode::Match]
42+
[<Raw View>] [Type: regex::re_unicode::Match]
43+
[text] : "2020-10-15" [Type: str]
44+
[match_text] : "2020-10-15"
45+
[start] : 0 [Type: unsigned __int64]
46+
[end] : 10 [Type: unsigned __int64]
47+
48+
m2 : "2020" [Type: regex::re_unicode::Match]
49+
[<Raw View>] [Type: regex::re_unicode::Match]
50+
[text] : "2020-10-15" [Type: str]
51+
[match_text] : "2020"
52+
[start] : 0 [Type: unsigned __int64]
53+
[end] : 4 [Type: unsigned __int64]
54+
55+
m3 : "10" [Type: regex::re_unicode::Match]
56+
[<Raw View>] [Type: regex::re_unicode::Match]
57+
[text] : "2020-10-15" [Type: str]
58+
[match_text] : "10"
59+
[start] : 5 [Type: unsigned __int64]
60+
[end] : 7 [Type: unsigned __int64]
61+
62+
m4 : "15" [Type: regex::re_unicode::Match]
63+
[<Raw View>] [Type: regex::re_unicode::Match]
64+
[text] : "2020-10-15" [Type: str]
65+
[match_text] : "15"
66+
[start] : 8 [Type: unsigned __int64]
67+
[end] : 10 [Type: unsigned __int64]
68+
"#
69+
)]
70+
fn test_debugger_visualizer() {
71+
let re = regex::Regex::new(
72+
r"^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})$",
73+
)
74+
.unwrap();
75+
let text = "2020-10-15";
76+
77+
let captures = re.captures(text).unwrap();
78+
let matches = captures
79+
.iter()
80+
.filter_map(|capture| capture)
81+
.collect::<Vec<regex::Match>>();
82+
assert_eq!(4, matches.len());
83+
__break(); // #break
84+
85+
let m1 = matches[0];
86+
assert_eq!("2020-10-15", m1.as_str());
87+
88+
let m2 = matches[1];
89+
assert_eq!("2020", m2.as_str());
90+
91+
let m3 = matches[2];
92+
assert_eq!("10", m3.as_str());
93+
94+
let m4 = matches[3];
95+
assert_eq!("15", m4.as_str());
96+
__break(); // #break
97+
}
98+
99+
#[debugger_test(
100+
debugger = "cdb",
101+
commands = r#"
102+
.nvlist
103+
dv
104+
dx re
105+
dx captures
106+
g
107+
dx m1
108+
dx m2
109+
dx m3
110+
dx m4
111+
"#,
112+
expected_statements = r#"
113+
re : { text="^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})$" } [Type: regex::re_bytes::Regex]
114+
[<Raw View>] [Type: regex::re_bytes::Regex]
115+
[Reference count] : 0x2 [Type: core::sync::atomic::AtomicUsize]
116+
[Weak reference count] : 0x1 [Type: core::sync::atomic::AtomicUsize]
117+
[+0xc00] res : { len=0x1 } [Type: alloc::vec::Vec<alloc::string::String,alloc::alloc::Global>]
118+
[+0x000] nfa [Type: regex::prog::Program]
119+
[+0x320] dfa [Type: regex::prog::Program]
120+
[+0x640] dfa_reverse [Type: regex::prog::Program]
121+
[+0x960] suffixes [Type: regex::literal::imp::LiteralSearcher]
122+
[+0xc18] ac : None [Type: enum$<core::option::Option<aho_corasick::ahocorasick::AhoCorasick<u32> >, 0, 1, Some>]
123+
[+0xda0] match_type : Dfa [Type: enum$<regex::exec::MatchType>]
124+
125+
captures : { named_groups=0x3 } [Type: regex::re_bytes::Captures]
126+
[<Raw View>] [Type: regex::re_bytes::Captures]
127+
[text] : { len=0xa } [Type: slice$<u8>]
128+
pattern:\[named_groups\] : \{ len=0x3 \} \[Type: .*\]
129+
pattern:\[0\] : .* : "2020-10-15" \[Type: char \*\]
130+
pattern:\[1\] : .* : "2020" \[Type: char \*\]
131+
pattern:\[2\] : .* : "10" \[Type: char \*\]
132+
pattern:\[3\] : .* : "15" \[Type: char \*\]
133+
134+
m1 : "2020-10-15" [Type: regex::re_bytes::Match]
135+
[<Raw View>] [Type: regex::re_bytes::Match]
136+
[text] : { len=0xa } [Type: slice$<u8>]
137+
[match_text] : "2020-10-15"
138+
[start] : 0 [Type: unsigned __int64]
139+
[end] : 10 [Type: unsigned __int64]
140+
141+
m2 : "2020" [Type: regex::re_bytes::Match]
142+
[<Raw View>] [Type: regex::re_bytes::Match]
143+
[text] : { len=0xa } [Type: slice$<u8>]
144+
[match_text] : "2020"
145+
[start] : 0 [Type: unsigned __int64]
146+
[end] : 4 [Type: unsigned __int64]
147+
148+
m3 : "10" [Type: regex::re_bytes::Match]
149+
[<Raw View>] [Type: regex::re_bytes::Match]
150+
[text] : { len=0xa } [Type: slice$<u8>]
151+
[match_text] : "10"
152+
[start] : 5 [Type: unsigned __int64]
153+
[end] : 7 [Type: unsigned __int64]
154+
155+
m4 : "15" [Type: regex::re_bytes::Match]
156+
[<Raw View>] [Type: regex::re_bytes::Match]
157+
[text] : { len=0xa } [Type: slice$<u8>]
158+
[match_text] : "15"
159+
[start] : 8 [Type: unsigned __int64]
160+
[end] : 10 [Type: unsigned __int64]
161+
"#
162+
)]
163+
fn test_bytes_debugger_visualizer() {
164+
let re = regex::bytes::Regex::new(
165+
r"^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})$",
166+
)
167+
.unwrap();
168+
let text = b"2020-10-15";
169+
170+
let captures = re.captures(text).unwrap();
171+
let matches = captures
172+
.iter()
173+
.filter_map(|capture| capture)
174+
.collect::<Vec<regex::bytes::Match>>();
175+
assert_eq!(4, matches.len());
176+
__break(); // #break
177+
178+
let m1 = matches[0];
179+
assert_eq!(b"2020-10-15", m1.as_bytes());
180+
181+
let m2 = matches[1];
182+
assert_eq!(b"2020", m2.as_bytes());
183+
184+
let m3 = matches[2];
185+
assert_eq!(b"10", m3.as_bytes());
186+
187+
let m4 = matches[3];
188+
assert_eq!(b"15", m4.as_bytes());
189+
190+
__break(); // #break
191+
}

tests/test_visualizers.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#[cfg(feature = "debugger_visualizer")]
2+
mod debugger_visualizer;

0 commit comments

Comments
 (0)