From 8788da12a61943ea0a6518fb3ba9e7529abcb96c Mon Sep 17 00:00:00 2001 From: Chris Denton Date: Mon, 23 Dec 2024 15:09:51 +0000 Subject: [PATCH] Implement normalize lexically --- library/std/src/path.rs | 77 +++++++++++++++++++++++++++++++++++++++ library/std/tests/path.rs | 56 +++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 7959c63385816..2cdded1dfcf99 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -2154,6 +2154,13 @@ pub struct Path { #[stable(since = "1.7.0", feature = "strip_prefix")] pub struct StripPrefixError(()); +/// An error returned from [`Path::normalize_lexically`] if a `..` parent reference +/// would escape the path. +#[unstable(feature = "normalize_lexically", issue = "134694")] +#[derive(Debug, PartialEq)] +#[non_exhaustive] +pub struct NormalizeError; + impl Path { // The following (private!) function allows construction of a path from a u8 // slice, which is only safe when it is known to follow the OsStr encoding. @@ -2961,6 +2968,67 @@ impl Path { fs::canonicalize(self) } + /// Normalize a path, including `..` without traversing the filesystem. + /// + /// Returns an error if normalization would leave leading `..` components. + /// + ///
+ /// + /// This function always resolves `..` to the "lexical" parent. + /// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path. + /// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn’t `a`. + /// + ///
+ /// + /// [`path::absolute`](absolute) is an alternative that preserves `..`. + /// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem. + #[unstable(feature = "normalize_lexically", issue = "134694")] + pub fn normalize_lexically(&self) -> Result { + let mut lexical = PathBuf::new(); + let mut iter = self.components().peekable(); + + // Find the root, if any, and add it to the lexical path. + // Here we treat the Windows path "C:\" as a single "root" even though + // `components` splits it into two: (Prefix, RootDir). + let root = match iter.peek() { + Some(Component::ParentDir) => return Err(NormalizeError), + Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => { + lexical.push(p); + iter.next(); + lexical.as_os_str().len() + } + Some(Component::Prefix(prefix)) => { + lexical.push(prefix.as_os_str()); + iter.next(); + if let Some(p @ Component::RootDir) = iter.peek() { + lexical.push(p); + iter.next(); + } + lexical.as_os_str().len() + } + None => return Ok(PathBuf::new()), + Some(Component::Normal(_)) => 0, + }; + + for component in iter { + match component { + Component::RootDir => unreachable!(), + Component::Prefix(_) => return Err(NormalizeError), + Component::CurDir => continue, + Component::ParentDir => { + // It's an error if ParentDir causes us to go above the "root". + if lexical.as_os_str().len() == root { + return Err(NormalizeError); + } else { + lexical.pop(); + } + } + Component::Normal(path) => lexical.push(path), + } + } + Ok(lexical) + } + /// Reads a symbolic link, returning the file that the link points to. /// /// This is an alias to [`fs::read_link`]. @@ -3502,6 +3570,15 @@ impl Error for StripPrefixError { } } +#[unstable(feature = "normalize_lexically", issue = "134694")] +impl fmt::Display for NormalizeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("parent reference `..` points outside of base directory") + } +} +#[unstable(feature = "normalize_lexically", issue = "134694")] +impl Error for NormalizeError {} + /// Makes the path absolute without accessing the filesystem. /// /// If the path is relative, the current directory is used as the base directory. diff --git a/library/std/tests/path.rs b/library/std/tests/path.rs index 978402b6fdaea..821a605ed509e 100644 --- a/library/std/tests/path.rs +++ b/library/std/tests/path.rs @@ -3,7 +3,8 @@ path_add_extension, path_file_prefix, maybe_uninit_slice, - os_string_pathbuf_leak + os_string_pathbuf_leak, + normalize_lexically )] use std::clone::CloneToUninit; @@ -1976,3 +1977,56 @@ fn clone_to_uninit() { unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b).cast()) }; assert_eq!(a, &*b); } + +#[test] +fn normalize_lexically() { + #[track_caller] + fn check_ok(a: &str, b: &str) { + assert_eq!(Path::new(a).normalize_lexically().unwrap(), PathBuf::from(b)); + } + + #[track_caller] + fn check_err(a: &str) { + assert!(Path::new(a).normalize_lexically().is_err()); + } + + // Relative paths + check_ok("a", "a"); + check_ok("./a", "./a"); + check_ok("a/b/c", "a/b/c"); + check_ok("a/././b/./c/.", "a/b/c"); + check_ok("a/../c", "c"); + check_ok("./a/b", "./a/b"); + check_ok("a/../b/c/..", "b"); + + check_err(".."); + check_err("../.."); + check_err("a/../.."); + check_err("a/../../b"); + check_err("a/../../b/c"); + check_err("a/../b/../.."); + + // Check we don't escape the root or prefix + #[cfg(unix)] + { + check_err("/.."); + check_err("/a/../.."); + } + #[cfg(windows)] + { + check_err(r"C:\.."); + check_err(r"C:\a\..\.."); + + check_err(r"C:.."); + check_err(r"C:a\..\.."); + + check_err(r"\\server\share\.."); + check_err(r"\\server\share\a\..\.."); + + check_err(r"\.."); + check_err(r"\a\..\.."); + + check_err(r"\\?\UNC\server\share\.."); + check_err(r"\\?\UNC\server\share\a\..\.."); + } +}