Skip to content

Commit 7f8846a

Browse files
committed
Uplift clippy::invalid_utf8_in_unchecked as invalid_from_utf8_unchecked
1 parent 1a5f8bc commit 7f8846a

File tree

8 files changed

+211
-0
lines changed

8 files changed

+211
-0
lines changed

compiler/rustc_lint/messages.ftl

+4
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
304304
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
305305
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
306306
307+
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
308+
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
309+
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
310+
307311
lint_lintpass_by_hand = implementing `LintPass` by hand
308312
.help = try using `declare_lint_pass!` or `impl_lint_pass!` instead
309313
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
use std::str::Utf8Error;
2+
3+
use rustc_ast::{BorrowKind, LitKind};
4+
use rustc_hir::{Expr, ExprKind};
5+
use rustc_span::source_map::Spanned;
6+
use rustc_span::sym;
7+
8+
use crate::lints::InvalidFromUtf8UncheckedDiag;
9+
use crate::{LateContext, LateLintPass, LintContext};
10+
11+
declare_lint! {
12+
/// The `invalid_from_utf8_unchecked` lint checks for calls to
13+
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
14+
/// with an invalid UTF-8 literal.
15+
///
16+
/// ### Example
17+
///
18+
/// ```rust,compile_fail
19+
/// # #[allow(unused)]
20+
/// unsafe {
21+
/// std::str::from_utf8_unchecked(b"Ru\x82st");
22+
/// }
23+
/// ```
24+
///
25+
/// {{produces}}
26+
///
27+
/// ### Explanation
28+
///
29+
/// Creating such a `str` would result in undefined behavior as per documentation
30+
/// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.
31+
pub INVALID_FROM_UTF8_UNCHECKED,
32+
Deny,
33+
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
34+
}
35+
36+
declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]);
37+
38+
impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
39+
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
40+
if let ExprKind::Call(path, [arg]) = expr.kind
41+
&& let ExprKind::Path(ref qpath) = path.kind
42+
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
43+
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
44+
&& [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
45+
{
46+
let lint = |utf8_error: Utf8Error| {
47+
let method = diag_item.as_str().strip_prefix("str_").unwrap();
48+
cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag {
49+
method: format!("std::str::{method}"),
50+
valid_up_to: utf8_error.valid_up_to(),
51+
label: arg.span,
52+
})
53+
};
54+
55+
match &arg.kind {
56+
ExprKind::Lit(Spanned { node: lit, .. }) => {
57+
if let LitKind::ByteStr(bytes, _) = &lit
58+
&& let Err(utf8_error) = std::str::from_utf8(bytes)
59+
{
60+
lint(utf8_error);
61+
}
62+
},
63+
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
64+
let elements = args.iter().map(|e|{
65+
match &e.kind {
66+
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
67+
LitKind::Byte(b) => Some(*b),
68+
LitKind::Int(b, _) => Some(*b as u8),
69+
_ => None
70+
}
71+
_ => None
72+
}
73+
}).collect::<Option<Vec<_>>>();
74+
75+
if let Some(elements) = elements
76+
&& let Err(utf8_error) = std::str::from_utf8(&elements)
77+
{
78+
lint(utf8_error);
79+
}
80+
}
81+
_ => {}
82+
}
83+
}
84+
}
85+
}

compiler/rustc_lint/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ mod expect;
6060
mod for_loops_over_fallibles;
6161
pub mod hidden_unicode_codepoints;
6262
mod internal;
63+
mod invalid_from_utf8;
6364
mod late;
6465
mod let_underscore;
6566
mod levels;
@@ -102,6 +103,7 @@ use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
102103
use for_loops_over_fallibles::*;
103104
use hidden_unicode_codepoints::*;
104105
use internal::*;
106+
use invalid_from_utf8::*;
105107
use let_underscore::*;
106108
use map_unit_fn::*;
107109
use methods::*;
@@ -207,6 +209,7 @@ late_lint_methods!(
207209
HardwiredLints: HardwiredLints,
208210
ImproperCTypesDeclarations: ImproperCTypesDeclarations,
209211
ImproperCTypesDefinitions: ImproperCTypesDefinitions,
212+
InvalidFromUtf8: InvalidFromUtf8,
210213
VariantSizeDifferences: VariantSizeDifferences,
211214
BoxPointers: BoxPointers,
212215
PathStatements: PathStatements,

compiler/rustc_lint/src/lints.rs

+10
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,16 @@ pub struct ForgetCopyDiag<'a> {
699699
pub label: Span,
700700
}
701701

702+
// invalid_from_utf8.rs
703+
#[derive(LintDiagnostic)]
704+
#[diag(lint_invalid_from_utf8_unchecked)]
705+
pub struct InvalidFromUtf8UncheckedDiag {
706+
pub method: String,
707+
pub valid_up_to: usize,
708+
#[label]
709+
pub label: Span,
710+
}
711+
702712
// hidden_unicode_codepoints.rs
703713
#[derive(LintDiagnostic)]
704714
#[diag(lint_hidden_unicode_codepoints)]

compiler/rustc_span/src/symbol.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,8 @@ symbols! {
14541454
stop_after_dataflow,
14551455
store,
14561456
str,
1457+
str_from_utf8_unchecked,
1458+
str_from_utf8_unchecked_mut,
14571459
str_split_whitespace,
14581460
str_trim,
14591461
str_trim_end,

library/core/src/str/converts.rs

+2
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
167167
#[must_use]
168168
#[stable(feature = "rust1", since = "1.0.0")]
169169
#[rustc_const_stable(feature = "const_str_from_utf8_unchecked", since = "1.55.0")]
170+
#[rustc_diagnostic_item = "str_from_utf8_unchecked"]
170171
pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
171172
// SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8.
172173
// Also relies on `&str` and `&[u8]` having the same layout.
@@ -194,6 +195,7 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
194195
#[must_use]
195196
#[stable(feature = "str_mut_extras", since = "1.20.0")]
196197
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "91005")]
198+
#[rustc_diagnostic_item = "str_from_utf8_unchecked_mut"]
197199
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
198200
// SAFETY: the caller must guarantee that the bytes `v`
199201
// are valid UTF-8, thus the cast to `*mut str` is safe.

tests/ui/lint/invalid_from_utf8.rs

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// check-pass
2+
3+
#![feature(concat_bytes)]
4+
#![warn(invalid_from_utf8_unchecked)]
5+
6+
pub fn from_utf8_unchecked_mut() {
7+
// Valid
8+
unsafe {
9+
std::str::from_utf8_unchecked_mut(&mut [99, 108, 105, 112, 112, 121]);
10+
std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']);
11+
12+
let x = 0xA0;
13+
std::str::from_utf8_unchecked_mut(&mut [0xC0, x]);
14+
}
15+
16+
// Invalid
17+
unsafe {
18+
std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
19+
//~^ WARN calls to `std::str::from_utf8_unchecked_mut`
20+
std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
21+
//~^ WARN calls to `std::str::from_utf8_unchecked_mut`
22+
}
23+
}
24+
25+
pub fn from_utf8_unchecked() {
26+
// Valid
27+
unsafe {
28+
std::str::from_utf8_unchecked(&[99, 108, 105, 112, 112, 121]);
29+
std::str::from_utf8_unchecked(&[b'c', b'l', b'i', b'p', b'p', b'y']);
30+
std::str::from_utf8_unchecked(b"clippy");
31+
32+
let x = 0xA0;
33+
std::str::from_utf8_unchecked(&[0xC0, x]);
34+
}
35+
36+
// Invalid
37+
unsafe {
38+
std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
39+
//~^ WARN calls to `std::str::from_utf8_unchecked`
40+
std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
41+
//~^ WARN calls to `std::str::from_utf8_unchecked`
42+
std::str::from_utf8_unchecked(b"cl\x82ippy");
43+
//~^ WARN calls to `std::str::from_utf8_unchecked`
44+
std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
45+
//~^ WARN calls to `std::str::from_utf8_unchecked`
46+
}
47+
}
48+
49+
fn main() {}
+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
2+
--> $DIR/invalid_from_utf8.rs:18:9
3+
|
4+
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
5+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
6+
| |
7+
| the literal was valid UTF-8 up to the 2 bytes
8+
|
9+
note: the lint level is defined here
10+
--> $DIR/invalid_from_utf8.rs:4:9
11+
|
12+
LL | #![warn(invalid_from_utf8_unchecked)]
13+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
14+
15+
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
16+
--> $DIR/invalid_from_utf8.rs:20:9
17+
|
18+
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
19+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
20+
| |
21+
| the literal was valid UTF-8 up to the 2 bytes
22+
23+
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
24+
--> $DIR/invalid_from_utf8.rs:38:9
25+
|
26+
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
27+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
28+
| |
29+
| the literal was valid UTF-8 up to the 2 bytes
30+
31+
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
32+
--> $DIR/invalid_from_utf8.rs:40:9
33+
|
34+
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
35+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
36+
| |
37+
| the literal was valid UTF-8 up to the 2 bytes
38+
39+
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
40+
--> $DIR/invalid_from_utf8.rs:42:9
41+
|
42+
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
43+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
44+
| |
45+
| the literal was valid UTF-8 up to the 2 bytes
46+
47+
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
48+
--> $DIR/invalid_from_utf8.rs:44:9
49+
|
50+
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
51+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
52+
| |
53+
| the literal was valid UTF-8 up to the 2 bytes
54+
55+
warning: 6 warnings emitted
56+

0 commit comments

Comments
 (0)