Skip to content

Commit 6efb929

Browse files
committed
Implement normalize lexically
1 parent 2cd3783 commit 6efb929

File tree

2 files changed

+129
-1
lines changed

2 files changed

+129
-1
lines changed

library/std/src/path.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,6 +2154,13 @@ pub struct Path {
21542154
#[stable(since = "1.7.0", feature = "strip_prefix")]
21552155
pub struct StripPrefixError(());
21562156

2157+
/// An error returned from [`Path::normalize_lexically`] if a `..` parent reference
2158+
/// would escape the path.
2159+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2160+
#[derive(Debug, PartialEq)]
2161+
#[non_exhaustive]
2162+
pub struct NormalizeError;
2163+
21572164
impl Path {
21582165
// The following (private!) function allows construction of a path from a u8
21592166
// slice, which is only safe when it is known to follow the OsStr encoding.
@@ -2961,6 +2968,64 @@ impl Path {
29612968
fs::canonicalize(self)
29622969
}
29632970

2971+
/// Normalize a path, including `..` without traversing the filesystem.
2972+
///
2973+
/// Returns an error if normalization would leave leading `..` components.
2974+
///
2975+
/// <div class="warning">
2976+
///
2977+
/// This function always resolves `..` to the "lexical" parent.
2978+
/// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path.
2979+
/// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn’t `a`.
2980+
///
2981+
/// </div>
2982+
///
2983+
/// [`path::absolute`](absolute) is an alternative that preserves `..`.
2984+
/// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem.
2985+
#[unstable(feature = "normalize_lexically", issue = "134694")]
2986+
pub fn normalize_lexically(&self) -> Result<PathBuf, NormalizeError> {
2987+
let mut lexical = PathBuf::new();
2988+
let mut iter = self.components().peekable();
2989+
2990+
// Find the root, if any.
2991+
let root = match iter.peek() {
2992+
Some(Component::ParentDir) => return Err(NormalizeError),
2993+
Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => {
2994+
lexical.push(p);
2995+
iter.next();
2996+
lexical.as_os_str().len()
2997+
}
2998+
Some(Component::Prefix(prefix)) => {
2999+
lexical.push(prefix.as_os_str());
3000+
iter.next();
3001+
if let Some(p @ Component::RootDir) = iter.peek() {
3002+
lexical.push(p);
3003+
iter.next();
3004+
}
3005+
lexical.as_os_str().len()
3006+
}
3007+
None => return Ok(PathBuf::new()),
3008+
Some(Component::Normal(_)) => 0,
3009+
};
3010+
3011+
for component in iter {
3012+
match component {
3013+
Component::RootDir => unreachable!(),
3014+
Component::Prefix(_) => return Err(NormalizeError),
3015+
Component::CurDir => continue,
3016+
Component::ParentDir => {
3017+
if lexical.as_os_str().len() == root {
3018+
return Err(NormalizeError);
3019+
} else {
3020+
lexical.pop();
3021+
}
3022+
}
3023+
Component::Normal(path) => lexical.push(path),
3024+
}
3025+
}
3026+
Ok(lexical)
3027+
}
3028+
29643029
/// Reads a symbolic link, returning the file that the link points to.
29653030
///
29663031
/// This is an alias to [`fs::read_link`].
@@ -3502,6 +3567,15 @@ impl Error for StripPrefixError {
35023567
}
35033568
}
35043569

3570+
#[unstable(feature = "normalize_lexically", issue = "134694")]
3571+
impl fmt::Display for NormalizeError {
3572+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3573+
f.write_str("parent reference `..` points outside of base directory")
3574+
}
3575+
}
3576+
#[unstable(feature = "normalize_lexically", issue = "134694")]
3577+
impl Error for NormalizeError {}
3578+
35053579
/// Makes the path absolute without accessing the filesystem.
35063580
///
35073581
/// If the path is relative, the current directory is used as the base directory.

library/std/tests/path.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
path_add_extension,
44
path_file_prefix,
55
maybe_uninit_slice,
6-
os_string_pathbuf_leak
6+
os_string_pathbuf_leak,
7+
normalize_lexically
78
)]
89

910
use std::clone::CloneToUninit;
@@ -1976,3 +1977,56 @@ fn clone_to_uninit() {
19761977
unsafe { a.clone_to_uninit(ptr::from_mut::<Path>(&mut b).cast()) };
19771978
assert_eq!(a, &*b);
19781979
}
1980+
1981+
#[test]
1982+
fn normalize_lexically() {
1983+
#[track_caller]
1984+
fn check_ok(a: &str, b: &str) {
1985+
assert_eq!(Path::new(a).normalize_lexically().unwrap(), PathBuf::from(b));
1986+
}
1987+
1988+
#[track_caller]
1989+
fn check_err(a: &str) {
1990+
assert!(Path::new(a).normalize_lexically().is_err());
1991+
}
1992+
1993+
// Relative paths
1994+
check_ok("a", "a");
1995+
check_ok("./a", "./a");
1996+
check_ok("a/b/c", "a/b/c");
1997+
check_ok("a/././b/./c/.", "a/b/c");
1998+
check_ok("a/../c", "c");
1999+
check_ok("./a/b", "./a/b");
2000+
check_ok("a/../b/c/..", "b");
2001+
2002+
check_err("..");
2003+
check_err("../..");
2004+
check_err("a/../..");
2005+
check_err("a/../../b");
2006+
check_err("a/../../b/c");
2007+
check_err("a/../b/../..");
2008+
2009+
// Check we don't escape the root or prefix
2010+
#[cfg(unix)]
2011+
{
2012+
check_err("/..");
2013+
check_err("/a/../..");
2014+
}
2015+
#[cfg(windows)]
2016+
{
2017+
check_err(r"C:\..");
2018+
check_err(r"C:\a\..\..");
2019+
2020+
check_err(r"C:..");
2021+
check_err(r"C:a\..\..");
2022+
2023+
check_err(r"\\server\share\..");
2024+
check_err(r"\\server\share\a\..\..");
2025+
2026+
check_err(r"\..");
2027+
check_err(r"\a\..\..");
2028+
2029+
check_err(r"\\?\UNC\server\share\..");
2030+
check_err(r"\\?\UNC\server\share\a\..\..");
2031+
}
2032+
}

0 commit comments

Comments
 (0)