Skip to content

Commit 532ed65

Browse files
authored
Rollup merge of rust-lang#134696 - ChrisDenton:normalize-lexically, r=workingjubilee
Implement `normalize_lexically` Implements rust-lang#134694 This is, I think, the most straightforward implementation I could do, which will hopefully more easily allow experimentation if we decide to change the design here.
2 parents 283db70 + c299e29 commit 532ed65

File tree

2 files changed

+132
-1
lines changed

2 files changed

+132
-1
lines changed

library/std/src/path.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,6 +2154,13 @@ pub struct Path {
21542154
#[stable(since = "1.7.0", feature = "strip_prefix")]
21552155
pub struct StripPrefixError(());
21562156

2157+
/// An error returned from [`Path::normalize_lexically`] if a `..` parent reference
2158+
/// would escape the path.
2159+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2160+
#[derive(Debug, PartialEq)]
2161+
#[non_exhaustive]
2162+
pub struct NormalizeError;
2163+
21572164
impl Path {
21582165
// The following (private!) function allows construction of a path from a u8
21592166
// slice, which is only safe when it is known to follow the OsStr encoding.
@@ -2961,6 +2968,67 @@ impl Path {
29612968
fs::canonicalize(self)
29622969
}
29632970

2971+
/// Normalize a path, including `..` without traversing the filesystem.
2972+
///
2973+
/// Returns an error if normalization would leave leading `..` components.
2974+
///
2975+
/// <div class="warning">
2976+
///
2977+
/// This function always resolves `..` to the "lexical" parent.
2978+
/// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path.
2979+
/// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn’t `a`.
2980+
///
2981+
/// </div>
2982+
///
2983+
/// [`path::absolute`](absolute) is an alternative that preserves `..`.
2984+
/// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem.
2985+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2986+
pub fn normalize_lexically(&self) -> Result<PathBuf, NormalizeError> {
2987+
let mut lexical = PathBuf::new();
2988+
let mut iter = self.components().peekable();
2989+
2990+
// Find the root, if any, and add it to the lexical path.
2991+
// Here we treat the Windows path "C:\" as a single "root" even though
2992+
// `components` splits it into two: (Prefix, RootDir).
2993+
let root = match iter.peek() {
2994+
Some(Component::ParentDir) => return Err(NormalizeError),
2995+
Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => {
2996+
lexical.push(p);
2997+
iter.next();
2998+
lexical.as_os_str().len()
2999+
}
3000+
Some(Component::Prefix(prefix)) => {
3001+
lexical.push(prefix.as_os_str());
3002+
iter.next();
3003+
if let Some(p @ Component::RootDir) = iter.peek() {
3004+
lexical.push(p);
3005+
iter.next();
3006+
}
3007+
lexical.as_os_str().len()
3008+
}
3009+
None => return Ok(PathBuf::new()),
3010+
Some(Component::Normal(_)) => 0,
3011+
};
3012+
3013+
for component in iter {
3014+
match component {
3015+
Component::RootDir => unreachable!(),
3016+
Component::Prefix(_) => return Err(NormalizeError),
3017+
Component::CurDir => continue,
3018+
Component::ParentDir => {
3019+
// It's an error if ParentDir causes us to go above the "root".
3020+
if lexical.as_os_str().len() == root {
3021+
return Err(NormalizeError);
3022+
} else {
3023+
lexical.pop();
3024+
}
3025+
}
3026+
Component::Normal(path) => lexical.push(path),
3027+
}
3028+
}
3029+
Ok(lexical)
3030+
}
3031+
29643032
/// Reads a symbolic link, returning the file that the link points to.
29653033
///
29663034
/// This is an alias to [`fs::read_link`].
@@ -3502,6 +3570,15 @@ impl Error for StripPrefixError {
35023570
}
35033571
}
35043572

3573+
#[unstable(feature = "normalize_lexically", issue = "134694")]
3574+
impl fmt::Display for NormalizeError {
3575+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3576+
f.write_str("parent reference `..` points outside of base directory")
3577+
}
3578+
}
3579+
#[unstable(feature = "normalize_lexically", issue = "134694")]
3580+
impl Error for NormalizeError {}
3581+
35053582
/// Makes the path absolute without accessing the filesystem.
35063583
///
35073584
/// If the path is relative, the current directory is used as the base directory.

library/std/tests/path.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
path_add_extension,
44
path_file_prefix,
55
maybe_uninit_slice,
6-
os_string_pathbuf_leak
6+
os_string_pathbuf_leak,
7+
normalize_lexically
78
)]
89

910
use std::clone::CloneToUninit;
@@ -2007,3 +2008,56 @@ fn test_embedded_newline() {
20072008
assert_eq!(path.file_name(), Some(OsStr::new("foo\nbar")));
20082009
assert_eq!(path.to_str(), Some("foo\nbar"));
20092010
}
2011+
2012+
#[test]
2013+
fn normalize_lexically() {
2014+
#[track_caller]
2015+
fn check_ok(a: &str, b: &str) {
2016+
assert_eq!(Path::new(a).normalize_lexically().unwrap(), PathBuf::from(b));
2017+
}
2018+
2019+
#[track_caller]
2020+
fn check_err(a: &str) {
2021+
assert!(Path::new(a).normalize_lexically().is_err());
2022+
}
2023+
2024+
// Relative paths
2025+
check_ok("a", "a");
2026+
check_ok("./a", "./a");
2027+
check_ok("a/b/c", "a/b/c");
2028+
check_ok("a/././b/./c/.", "a/b/c");
2029+
check_ok("a/../c", "c");
2030+
check_ok("./a/b", "./a/b");
2031+
check_ok("a/../b/c/..", "b");
2032+
2033+
check_err("..");
2034+
check_err("../..");
2035+
check_err("a/../..");
2036+
check_err("a/../../b");
2037+
check_err("a/../../b/c");
2038+
check_err("a/../b/../..");
2039+
2040+
// Check we don't escape the root or prefix
2041+
#[cfg(unix)]
2042+
{
2043+
check_err("/..");
2044+
check_err("/a/../..");
2045+
}
2046+
#[cfg(windows)]
2047+
{
2048+
check_err(r"C:\..");
2049+
check_err(r"C:\a\..\..");
2050+
2051+
check_err(r"C:..");
2052+
check_err(r"C:a\..\..");
2053+
2054+
check_err(r"\\server\share\..");
2055+
check_err(r"\\server\share\a\..\..");
2056+
2057+
check_err(r"\..");
2058+
check_err(r"\a\..\..");
2059+
2060+
check_err(r"\\?\UNC\server\share\..");
2061+
check_err(r"\\?\UNC\server\share\a\..\..");
2062+
}
2063+
}

0 commit comments

Comments
 (0)