Skip to content

Commit 9408c68

Browse files
committed
Auto merge of #6105 - bugadani:sus-char, r=ebroto
Lint for invisible Unicode characters other than ZWSP This PR extends the existing `zero_width_space` lint to look for other invisible characters as well (in this case, `\\u{ad}` soft hyphen. I feel like this lint is the logical place to add the check, but I also realize the lint name is not particularly flexible, but I also understand that it shouldn't be renamed for compatibility reasons. Open questions: - What other characters should trigger the lint? - What should be done with the lint name? - How to indicate the change in functionality? Motivation behind this PR: rust-lang/rust#77417 - I managed to shoot myself in the foot by an invisible character pasted into my test case. changelog: rename [`zero_width_space`] to [`invisible_characters`] and add SHY and WJ to the list.
2 parents a1a7f20 + 572e4c4 commit 9408c68

File tree

6 files changed

+47
-27
lines changed

6 files changed

+47
-27
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,7 @@ Released 2018-09-13
16471647
[`invalid_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_ref
16481648
[`invalid_regex`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_regex
16491649
[`invalid_upcast_comparisons`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_upcast_comparisons
1650+
[`invisible_characters`]: https://rust-lang.github.io/rust-clippy/master/index.html#invisible_characters
16501651
[`items_after_statements`]: https://rust-lang.github.io/rust-clippy/master/index.html#items_after_statements
16511652
[`iter_cloned_collect`]: https://rust-lang.github.io/rust-clippy/master/index.html#iter_cloned_collect
16521653
[`iter_next_loop`]: https://rust-lang.github.io/rust-clippy/master/index.html#iter_next_loop
@@ -1922,6 +1923,5 @@ Released 2018-09-13
19221923
[`zero_divided_by_zero`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_divided_by_zero
19231924
[`zero_prefixed_literal`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_prefixed_literal
19241925
[`zero_ptr`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_ptr
1925-
[`zero_width_space`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_width_space
19261926
[`zst_offset`]: https://rust-lang.github.io/rust-clippy/master/index.html#zst_offset
19271927
<!-- end autogenerated links to lint list -->

clippy_lints/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -854,9 +854,9 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
854854
&types::UNIT_CMP,
855855
&types::UNNECESSARY_CAST,
856856
&types::VEC_BOX,
857+
&unicode::INVISIBLE_CHARACTERS,
857858
&unicode::NON_ASCII_LITERAL,
858859
&unicode::UNICODE_NOT_NFC,
859-
&unicode::ZERO_WIDTH_SPACE,
860860
&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD,
861861
&unnamed_address::FN_ADDRESS_COMPARISONS,
862862
&unnamed_address::VTABLE_ADDRESS_COMPARISONS,
@@ -1511,7 +1511,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
15111511
LintId::of(&types::UNIT_CMP),
15121512
LintId::of(&types::UNNECESSARY_CAST),
15131513
LintId::of(&types::VEC_BOX),
1514-
LintId::of(&unicode::ZERO_WIDTH_SPACE),
1514+
LintId::of(&unicode::INVISIBLE_CHARACTERS),
15151515
LintId::of(&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD),
15161516
LintId::of(&unnamed_address::FN_ADDRESS_COMPARISONS),
15171517
LintId::of(&unnamed_address::VTABLE_ADDRESS_COMPARISONS),
@@ -1779,7 +1779,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
17791779
LintId::of(&types::ABSURD_EXTREME_COMPARISONS),
17801780
LintId::of(&types::CAST_REF_TO_MUT),
17811781
LintId::of(&types::UNIT_CMP),
1782-
LintId::of(&unicode::ZERO_WIDTH_SPACE),
1782+
LintId::of(&unicode::INVISIBLE_CHARACTERS),
17831783
LintId::of(&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD),
17841784
LintId::of(&unnamed_address::FN_ADDRESS_COMPARISONS),
17851785
LintId::of(&unnamed_address::VTABLE_ADDRESS_COMPARISONS),
@@ -1910,6 +1910,7 @@ pub fn register_renamed(ls: &mut rustc_lint::LintStore) {
19101910
ls.register_renamed("clippy::for_loop_over_option", "clippy::for_loops_over_fallibles");
19111911
ls.register_renamed("clippy::for_loop_over_result", "clippy::for_loops_over_fallibles");
19121912
ls.register_renamed("clippy::identity_conversion", "clippy::useless_conversion");
1913+
ls.register_renamed("clippy::zero_width_space", "clippy::invisible_characters");
19131914
}
19141915

19151916
// only exists to let the dogfood integration test works.

clippy_lints/src/unicode.rs

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@ use rustc_span::source_map::Span;
88
use unicode_normalization::UnicodeNormalization;
99

1010
declare_clippy_lint! {
11-
/// **What it does:** Checks for the Unicode zero-width space in the code.
11+
/// **What it does:** Checks for invisible Unicode characters in the code.
1212
///
1313
/// **Why is this bad?** Having an invisible character in the code makes for all
1414
/// sorts of April fools, but otherwise is very much frowned upon.
1515
///
1616
/// **Known problems:** None.
1717
///
18-
/// **Example:** You don't see it, but there may be a zero-width space
19-
/// somewhere in this text.
20-
pub ZERO_WIDTH_SPACE,
18+
/// **Example:** You don't see it, but there may be a zero-width space or soft hyphen
19+
/// some­where in this text.
20+
pub INVISIBLE_CHARACTERS,
2121
correctness,
22-
"using a zero-width space in a string literal, which is confusing"
22+
"using an invisible character in a string literal, which is confusing"
2323
}
2424

2525
declare_clippy_lint! {
@@ -63,7 +63,7 @@ declare_clippy_lint! {
6363
"using a Unicode literal not in NFC normal form (see [Unicode tr15](http://www.unicode.org/reports/tr15/) for further information)"
6464
}
6565

66-
declare_lint_pass!(Unicode => [ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC]);
66+
declare_lint_pass!(Unicode => [INVISIBLE_CHARACTERS, NON_ASCII_LITERAL, UNICODE_NOT_NFC]);
6767

6868
impl LateLintPass<'_> for Unicode {
6969
fn check_expr(&mut self, cx: &LateContext<'_>, expr: &'_ Expr<'_>) {
@@ -91,14 +91,17 @@ fn escape<T: Iterator<Item = char>>(s: T) -> String {
9191

9292
fn check_str(cx: &LateContext<'_>, span: Span, id: HirId) {
9393
let string = snippet(cx, span, "");
94-
if string.contains('\u{200B}') {
94+
if string.chars().any(|c| ['\u{200B}', '\u{ad}', '\u{2060}'].contains(&c)) {
9595
span_lint_and_sugg(
9696
cx,
97-
ZERO_WIDTH_SPACE,
97+
INVISIBLE_CHARACTERS,
9898
span,
99-
"zero-width space detected",
99+
"invisible character detected",
100100
"consider replacing the string with",
101-
string.replace("\u{200B}", "\\u{200B}"),
101+
string
102+
.replace("\u{200B}", "\\u{200B}")
103+
.replace("\u{ad}", "\\u{AD}")
104+
.replace("\u{2060}", "\\u{2060}"),
102105
Applicability::MachineApplicable,
103106
);
104107
}

src/lintlist/mod.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,13 @@ pub static ref ALL_LINTS: Vec<Lint> = vec![
969969
deprecation: None,
970970
module: "types",
971971
},
972+
Lint {
973+
name: "invisible_characters",
974+
group: "correctness",
975+
desc: "using an invisible character in a string literal, which is confusing",
976+
deprecation: None,
977+
module: "unicode",
978+
},
972979
Lint {
973980
name: "items_after_statements",
974981
group: "pedantic",
@@ -2810,13 +2817,6 @@ pub static ref ALL_LINTS: Vec<Lint> = vec![
28102817
deprecation: None,
28112818
module: "misc",
28122819
},
2813-
Lint {
2814-
name: "zero_width_space",
2815-
group: "correctness",
2816-
desc: "using a zero-width space in a string literal, which is confusing",
2817-
deprecation: None,
2818-
module: "unicode",
2819-
},
28202820
Lint {
28212821
name: "zst_offset",
28222822
group: "correctness",

tests/ui/unicode.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1-
#[warn(clippy::zero_width_space)]
1+
#[warn(clippy::invisible_characters)]
22
fn zero() {
33
print!("Here >​< is a ZWS, and ​another");
44
print!("This\u{200B}is\u{200B}fine");
5+
print!("Here >­< is a SHY, and ­another");
6+
print!("This\u{ad}is\u{ad}fine");
7+
print!("Here >⁠< is a WJ, and ⁠another");
8+
print!("This\u{2060}is\u{2060}fine");
59
}
610

711
#[warn(clippy::unicode_not_nfc)]

tests/ui/unicode.stderr

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,38 @@
1-
error: zero-width space detected
1+
error: invisible character detected
22
--> $DIR/unicode.rs:3:12
33
|
44
LL | print!("Here >​< is a ZWS, and ​another");
55
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{200B}< is a ZWS, and /u{200B}another"`
66
|
7-
= note: `-D clippy::zero-width-space` implied by `-D warnings`
7+
= note: `-D clippy::invisible-characters` implied by `-D warnings`
8+
9+
error: invisible character detected
10+
--> $DIR/unicode.rs:5:12
11+
|
12+
LL | print!("Here >­< is a SHY, and ­another");
13+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{AD}< is a SHY, and /u{AD}another"`
14+
15+
error: invisible character detected
16+
--> $DIR/unicode.rs:7:12
17+
|
18+
LL | print!("Here >⁠< is a WJ, and ⁠another");
19+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{2060}< is a WJ, and /u{2060}another"`
820

921
error: non-NFC Unicode sequence detected
10-
--> $DIR/unicode.rs:9:12
22+
--> $DIR/unicode.rs:13:12
1123
|
1224
LL | print!("̀àh?");
1325
| ^^^^^ help: consider replacing the string with: `"̀àh?"`
1426
|
1527
= note: `-D clippy::unicode-not-nfc` implied by `-D warnings`
1628

1729
error: literal non-ASCII character detected
18-
--> $DIR/unicode.rs:15:12
30+
--> $DIR/unicode.rs:19:12
1931
|
2032
LL | print!("Üben!");
2133
| ^^^^^^^ help: consider replacing the string with: `"/u{dc}ben!"`
2234
|
2335
= note: `-D clippy::non-ascii-literal` implied by `-D warnings`
2436

25-
error: aborting due to 3 previous errors
37+
error: aborting due to 5 previous errors
2638

0 commit comments

Comments
 (0)