Skip to content

Commit 5bc1489

Browse files
Correctly handle unicode characters and tags being open just before the end of the doc comment
1 parent 30cabfd commit 5bc1489

File tree

3 files changed

+72
-29
lines changed

3 files changed

+72
-29
lines changed

src/librustdoc/passes/html_tags.rs

+42-16
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,22 @@ fn drop_tag(
4545
range: Range<usize>,
4646
f: &impl Fn(&str, &Range<usize>),
4747
) {
48-
if let Some(pos) = tags.iter().rev().position(|(t, _)| *t == tag_name) {
48+
let tag_name_low = tag_name.to_lowercase();
49+
if let Some(pos) = tags.iter().rev().position(|(t, _)| t.to_lowercase() == tag_name_low) {
4950
// Because this is from a `rev` iterator, the position is reversed as well!
5051
let pos = tags.len() - 1 - pos;
51-
// If the tag is nested inside a "<script>", not warning should be emitted.
52-
let should_not_warn =
53-
tags.iter().take(pos + 1).any(|(at, _)| at == "script" || at == "style");
52+
// If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
53+
// be emitted.
54+
let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| {
55+
let at = at.to_lowercase();
56+
at == "script" || at == "style"
57+
});
5458
for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) {
55-
if should_not_warn || ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name) {
59+
if should_not_warn {
60+
continue;
61+
}
62+
let last_tag_name_low = last_tag_name.to_lowercase();
63+
if ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name_low) {
5664
continue;
5765
}
5866
// `tags` is used as a queue, meaning that everything after `pos` is included inside it.
@@ -77,21 +85,29 @@ fn extract_tag(
7785
) {
7886
let mut iter = text.chars().enumerate().peekable();
7987

80-
while let Some((start_pos, c)) = iter.next() {
88+
'top: while let Some((start_pos, c)) = iter.next() {
8189
if c == '<' {
8290
let mut tag_name = String::new();
8391
let mut is_closing = false;
84-
while let Some((pos, c)) = iter.peek() {
92+
let mut prev_pos = start_pos;
93+
loop {
94+
let (pos, c) = match iter.peek() {
95+
Some((pos, c)) => (*pos, *c),
96+
// In case we reached the of the doc comment, we want to check that it's an
97+
// unclosed HTML tag. For example "/// <h3".
98+
None => (prev_pos, '\0'),
99+
};
100+
prev_pos = pos;
85101
// Checking if this is a closing tag (like `</a>` for `<a>`).
86-
if *c == '/' && tag_name.is_empty() {
102+
if c == '/' && tag_name.is_empty() {
87103
is_closing = true;
88-
} else if c.is_ascii_alphanumeric() && !c.is_ascii_uppercase() {
89-
tag_name.push(*c);
104+
} else if c.is_ascii_alphanumeric() {
105+
tag_name.push(c);
90106
} else {
91107
if !tag_name.is_empty() {
92108
let mut r =
93109
Range { start: range.start + start_pos, end: range.start + pos };
94-
if *c == '>' {
110+
if c == '>' {
95111
// In case we have a tag without attribute, we can consider the span to
96112
// refer to it fully.
97113
r.end += 1;
@@ -102,11 +118,20 @@ fn extract_tag(
102118
tags.push((tag_name, r));
103119
}
104120
}
105-
break;
121+
continue 'top;
122+
}
123+
// Some chars like 💩 are longer than 1 character, so we need to skip the other
124+
// bytes as well to prevent stopping "in the middle" of a char.
125+
for _ in 0..c.len_utf8() {
126+
iter.next();
106127
}
107-
iter.next();
108128
}
109129
}
130+
// Some chars like 💩 are longer than 1 character, so we need to skip the other
131+
// bytes as well to prevent stopping "in the middle" of a char.
132+
for _ in 0..c.len_utf8() - 1 {
133+
iter.next();
134+
}
110135
}
111136
}
112137

@@ -143,9 +168,10 @@ impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> {
143168
}
144169
}
145170

146-
for (tag, range) in
147-
tags.iter().filter(|(t, _)| ALLOWED_UNCLOSED.iter().find(|&at| at == t).is_none())
148-
{
171+
for (tag, range) in tags.iter().filter(|(t, _)| {
172+
let t = t.to_lowercase();
173+
ALLOWED_UNCLOSED.iter().find(|&&at| at == t).is_none()
174+
}) {
149175
report_diag(&format!("unclosed HTML tag `{}`", tag), range);
150176
}
151177
}

src/test/rustdoc-ui/invalid-html-tags.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#![deny(invalid_html_tags)]
22

3+
//! <p>💩<p>
4+
//~^ ERROR unclosed HTML tag `p`
5+
36
/// <img><input>
47
/// <script>
58
/// <img><input>
@@ -38,6 +41,8 @@ pub fn b() {}
3841
//~^ ERROR unclosed HTML tag `div`
3942
/// <h3>
4043
//~^ ERROR unclosed HTML tag `h3`
44+
/// <script
45+
//~^ ERROR unclosed HTML tag `script`
4146
pub fn c() {}
4247

4348
// Unclosed tags shouldn't warn if they are nested inside a <script> elem.
@@ -55,7 +60,7 @@ pub fn d() {}
5560
/// <style>
5661
/// <h3><div>
5762
/// </style>
58-
/// <style>
63+
/// <stYle>
5964
/// <div>
6065
/// <p>
6166
/// </div>
+24-12
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,68 @@
1-
error: unclosed HTML tag `unknown`
2-
--> $DIR/invalid-html-tags.rs:7:5
1+
error: unclosed HTML tag `p`
2+
--> $DIR/invalid-html-tags.rs:3:5
33
|
4-
LL | /// <unknown>
5-
| ^^^^^^^^^
4+
LL | //! <p>💩<p>
5+
| ^^^
66
|
77
note: the lint level is defined here
88
--> $DIR/invalid-html-tags.rs:1:9
99
|
1010
LL | #![deny(invalid_html_tags)]
1111
| ^^^^^^^^^^^^^^^^^
1212

13-
error: unclosed HTML tag `script`
13+
error: unclosed HTML tag `unknown`
1414
--> $DIR/invalid-html-tags.rs:10:5
1515
|
16+
LL | /// <unknown>
17+
| ^^^^^^^^^
18+
19+
error: unclosed HTML tag `script`
20+
--> $DIR/invalid-html-tags.rs:13:5
21+
|
1622
LL | /// <script>
1723
| ^^^^^^^^
1824

1925
error: unclosed HTML tag `h2`
20-
--> $DIR/invalid-html-tags.rs:15:7
26+
--> $DIR/invalid-html-tags.rs:18:7
2127
|
2228
LL | /// <h2>
2329
| ^^^^
2430

2531
error: unclosed HTML tag `h3`
26-
--> $DIR/invalid-html-tags.rs:17:9
32+
--> $DIR/invalid-html-tags.rs:20:9
2733
|
2834
LL | /// <h3>
2935
| ^^^^
3036

3137
error: unopened HTML tag `hello`
32-
--> $DIR/invalid-html-tags.rs:20:5
38+
--> $DIR/invalid-html-tags.rs:23:5
3339
|
3440
LL | /// </hello>
3541
| ^^^^^^^^
3642

3743
error: unclosed HTML tag `p`
38-
--> $DIR/invalid-html-tags.rs:25:14
44+
--> $DIR/invalid-html-tags.rs:28:14
3945
|
4046
LL | /// <br/> <p>
4147
| ^^^
4248

4349
error: unclosed HTML tag `div`
44-
--> $DIR/invalid-html-tags.rs:37:5
50+
--> $DIR/invalid-html-tags.rs:40:5
4551
|
4652
LL | /// <div style="hello">
4753
| ^^^^
4854

4955
error: unclosed HTML tag `h3`
50-
--> $DIR/invalid-html-tags.rs:39:7
56+
--> $DIR/invalid-html-tags.rs:42:7
5157
|
5258
LL | /// <h3>
5359
| ^^^^
5460

55-
error: aborting due to 8 previous errors
61+
error: unclosed HTML tag `script`
62+
--> $DIR/invalid-html-tags.rs:44:5
63+
|
64+
LL | /// <script
65+
| ^^^^^^
66+
67+
error: aborting due to 10 previous errors
5668

0 commit comments

Comments
 (0)