diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 5b2199c2b7fa4..5e73710f7a611 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1474,145 +1474,78 @@ mod remove_dir_impl { pub use crate::sys_common::fs::remove_dir_all; } -// Dynamically choose implementation Macos x86-64: modern for 10.10+, fallback for older versions -#[cfg(all(target_os = "macos", target_arch = "x86_64"))] +// Modern implementation using openat(), unlinkat() and fdopendir() +#[cfg(not(any(target_os = "redox", target_os = "espidf")))] mod remove_dir_impl { - use super::{cstr, lstat, Dir, InnerReadDir, ReadDir}; - use crate::ffi::CStr; + use super::{cstr, lstat, Dir, DirEntry, InnerReadDir, ReadDir}; + use crate::ffi::{CStr, CString}; use crate::io; + use crate::mem; use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; - use crate::os::unix::prelude::{OwnedFd, RawFd}; + use crate::os::unix::prelude::{AsFd, BorrowedFd, OwnedFd, RawFd}; use crate::path::{Path, PathBuf}; use crate::sync::Arc; - use crate::sys::weak::weak; use crate::sys::{cvt, cvt_r}; - use libc::{c_char, c_int, DIR}; - - pub fn openat_nofollow_dironly(parent_fd: Option, p: &CStr) -> io::Result { - weak!(fn openat(c_int, *const c_char, c_int) -> c_int); - let fd = cvt_r(|| unsafe { - openat.get().unwrap()( - parent_fd.unwrap_or(libc::AT_FDCWD), - p.as_ptr(), - libc::O_CLOEXEC | libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_DIRECTORY, - ) - })?; - Ok(unsafe { OwnedFd::from_raw_fd(fd) }) - } + use alloc::collections::VecDeque; + use libc::dev_t; - fn fdreaddir(dir_fd: OwnedFd) -> io::Result<(ReadDir, RawFd)> { - weak!(fn fdopendir(c_int) -> *mut DIR, "fdopendir$INODE64"); - let ptr = unsafe { fdopendir.get().unwrap()(dir_fd.as_raw_fd()) }; - if ptr.is_null() { - return Err(io::Error::last_os_error()); - } - let dirp = Dir(ptr); - // file descriptor is automatically closed by libc::closedir() now, so give up ownership - let new_parent_fd = dir_fd.into_raw_fd(); - // a valid root is not needed because we do not call any functions involving the full path - // of the DirEntrys. - let dummy_root = PathBuf::new(); - Ok(( - ReadDir { - inner: Arc::new(InnerReadDir { dirp, root: dummy_root }), - end_of_stream: false, - }, - new_parent_fd, - )) - } - - fn remove_dir_all_recursive(parent_fd: Option, p: &Path) -> io::Result<()> { - weak!(fn unlinkat(c_int, *const c_char, c_int) -> c_int); - - let pcstr = cstr(p)?; + #[cfg(not(all(target_os = "macos", target_arch = "x86_64"),))] + use libc::{fdopendir, openat, unlinkat}; - // entry is expected to be a directory, open as such - let fd = openat_nofollow_dironly(parent_fd, &pcstr)?; + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + mod macos_weak { + use crate::sys::weak::weak; + use libc::{c_char, c_int, DIR}; - // open the directory passing ownership of the fd - let (dir, fd) = fdreaddir(fd)?; - for child in dir { - let child = child?; - match child.entry.d_type { - libc::DT_DIR => { - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; - } - libc::DT_UNKNOWN => { - match cvt(unsafe { unlinkat.get().unwrap()(fd, child.name_cstr().as_ptr(), 0) }) - { - // type unknown - try to unlink - Err(err) if err.raw_os_error() == Some(libc::EPERM) => { - // if the file is a directory unlink fails with EPERM - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; - } - result => { - result?; - } - } - } - _ => { - // not a directory -> unlink - cvt(unsafe { unlinkat.get().unwrap()(fd, child.name_cstr().as_ptr(), 0) })?; - } - } + pub unsafe fn openat(dirfd: c_int, pathname: *const c_char, flags: c_int) -> c_int { + weak!(fn openat(c_int, *const c_char, c_int) -> c_int); + openat.get().unwrap()(dirfd, pathname, flags) } - // unlink the directory after removing its contents - cvt(unsafe { - unlinkat.get().unwrap()( - parent_fd.unwrap_or(libc::AT_FDCWD), - pcstr.as_ptr(), - libc::AT_REMOVEDIR, - ) - })?; - Ok(()) - } + pub unsafe fn fdopendir(fd: c_int) -> *mut DIR { + weak!(fn fdopendir(c_int) -> *mut DIR, "fdopendir$INODE64"); + fdopendir.get().unwrap()(fd) + } - fn remove_dir_all_modern(p: &Path) -> io::Result<()> { - // We cannot just call remove_dir_all_recursive() here because that would not delete a passed - // symlink. No need to worry about races, because remove_dir_all_recursive() does not recurse - // into symlinks. - let attr = lstat(p)?; - if attr.file_type().is_symlink() { - crate::fs::remove_file(p) - } else { - remove_dir_all_recursive(None, p) + pub unsafe fn unlinkat(dirfd: c_int, pathname: *const c_char, flags: c_int) -> c_int { + weak!(fn unlinkat(c_int, *const c_char, c_int) -> c_int); + unlinkat.get().unwrap()(dirfd, pathname, flags) } - } - pub fn remove_dir_all(p: &Path) -> io::Result<()> { - weak!(fn openat(c_int, *const c_char, c_int) -> c_int); - if openat.get().is_some() { - // openat() is available with macOS 10.10+, just like unlinkat() and fdopendir() - remove_dir_all_modern(p) - } else { - // fall back to classic implementation - crate::sys_common::fs::remove_dir_all(p) + pub fn has_openat() -> bool { + weak!(fn openat(c_int, *const c_char, c_int) -> c_int); + openat.get().is_some() } } -} -// Modern implementation using openat(), unlinkat() and fdopendir() -#[cfg(not(any( - all(target_os = "macos", target_arch = "x86_64"), - target_os = "redox", - target_os = "espidf" -)))] -mod remove_dir_impl { - use super::{cstr, lstat, Dir, DirEntry, InnerReadDir, ReadDir}; - use crate::ffi::CStr; - use crate::io; - use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; - use crate::os::unix::prelude::{OwnedFd, RawFd}; - use crate::path::{Path, PathBuf}; - use crate::sync::Arc; - use crate::sys::{cvt, cvt_r}; - use libc::{fdopendir, openat, unlinkat}; + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + use macos_weak::{fdopendir, openat, unlinkat}; - pub fn openat_nofollow_dironly(parent_fd: Option, p: &CStr) -> io::Result { + #[cfg(not(any( + target_os = "linux", + target_os = "emscripten", + target_os = "l4re", + target_os = "android" + )))] + use libc::fstat as fstat64; + #[cfg(any( + target_os = "linux", + target_os = "emscripten", + target_os = "l4re", + target_os = "android" + ))] + use libc::fstat64; + + // VecDeque allocates space for 2^n elements if the capacity is 2^n-1. + const MAX_OPEN_FDS: usize = 31; + + pub fn openat_nofollow_dironly( + parent_fd: Option>, + p: &CStr, + ) -> io::Result { let fd = cvt_r(|| unsafe { openat( - parent_fd.unwrap_or(libc::AT_FDCWD), + parent_fd.map(|fd| fd.as_raw_fd()).unwrap_or(libc::AT_FDCWD), p.as_ptr(), libc::O_CLOEXEC | libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_DIRECTORY, ) @@ -1620,34 +1553,6 @@ mod remove_dir_impl { Ok(unsafe { OwnedFd::from_raw_fd(fd) }) } - fn fdreaddir(dir_fd: OwnedFd) -> io::Result<(ReadDir, RawFd)> { - let ptr = unsafe { fdopendir(dir_fd.as_raw_fd()) }; - if ptr.is_null() { - return Err(io::Error::last_os_error()); - } - let dirp = Dir(ptr); - // file descriptor is automatically closed by libc::closedir() now, so give up ownership - let new_parent_fd = dir_fd.into_raw_fd(); - // a valid root is not needed because we do not call any functions involving the full path - // of the DirEntrys. - let dummy_root = PathBuf::new(); - Ok(( - ReadDir { - inner: Arc::new(InnerReadDir { dirp, root: dummy_root }), - #[cfg(not(any( - target_os = "android", - target_os = "linux", - target_os = "solaris", - target_os = "illumos", - target_os = "fuchsia", - target_os = "redox", - )))] - end_of_stream: false, - }, - new_parent_fd, - )) - } - #[cfg(any( target_os = "solaris", target_os = "illumos", @@ -1672,55 +1577,264 @@ mod remove_dir_impl { } } - fn remove_dir_all_recursive(parent_fd: Option, p: &Path) -> io::Result<()> { - let pcstr = cstr(p)?; - - // entry is expected to be a directory, open as such - let fd = openat_nofollow_dironly(parent_fd, &pcstr)?; - - // open the directory passing ownership of the fd - let (dir, fd) = fdreaddir(fd)?; - for child in dir { - let child = child?; - match is_dir(&child) { - Some(true) => { - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; - } - Some(false) => { - cvt(unsafe { unlinkat(fd, child.name_cstr().as_ptr(), 0) })?; - } - None => match cvt(unsafe { unlinkat(fd, child.name_cstr().as_ptr(), 0) }) { + fn unlink_direntry(ent: &DirEntry, parent_fd: BorrowedFd<'_>) -> io::Result { + match is_dir(ent) { + Some(true) => Ok(false), + Some(false) => { + cvt(unsafe { unlinkat(parent_fd.as_raw_fd(), ent.name_cstr().as_ptr(), 0) })?; + Ok(true) + } + None => { + match cvt(unsafe { unlinkat(parent_fd.as_raw_fd(), ent.name_cstr().as_ptr(), 0) }) { // type unknown - try to unlink Err(err) if err.raw_os_error() == Some(libc::EISDIR) || err.raw_os_error() == Some(libc::EPERM) => { - // if the file is a directory unlink fails with EISDIR on Linux and EPERM everyhwere else - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; + // if the file is a directory unlink fails with EISDIR on Linux + // and EPERM everyhwere else + Ok(false) + } + result => result.map(|_| true), + } + } + } + } + + enum LazyReadDir { + Fd(Option), // never None except when fdreaddir() fails + OpenReadDir(ReadDir, RawFd), + } + + impl LazyReadDir { + fn from_path(path: &CStr) -> io::Result { + let fd = openat_nofollow_dironly(None, path)?; + Ok(LazyReadDir::Fd(Some(fd))) + } + + fn get_child(&self, child_name: &CStr) -> io::Result { + let fd = openat_nofollow_dironly(Some(self.as_fd()), child_name)?; + Ok(LazyReadDir::Fd(Some(fd))) + } + + fn get_parent(&self) -> io::Result { + let fd = openat_nofollow_dironly(Some(self.as_fd()), unsafe { + CStr::from_bytes_with_nul_unchecked(b"..\0") + })?; + Ok(LazyReadDir::Fd(Some(fd))) + } + + fn ensure_open(&mut self) -> io::Result<()> { + if let LazyReadDir::Fd(fd_opt) = self { + let fd = fd_opt.take().unwrap(); + let ptr = unsafe { fdopendir(fd.as_raw_fd()) }; + if ptr.is_null() { + return Err(io::Error::last_os_error()); + } + let dirp = Dir(ptr); + // file descriptor is automatically closed by libc::closedir() now, so give up ownership + let new_parent_fd = fd.into_raw_fd(); + // a valid root is not needed because we do not call any functions involving the full path + // of the DirEntrys. + let dummy_root = PathBuf::new(); + *self = LazyReadDir::OpenReadDir( + ReadDir { + inner: Arc::new(InnerReadDir { dirp, root: dummy_root }), + #[cfg(not(any( + target_os = "android", + target_os = "linux", + target_os = "solaris", + target_os = "illumos", + target_os = "fuchsia", + target_os = "redox", + )))] + end_of_stream: false, + }, + new_parent_fd, + ); + } + Ok(()) + } + + fn is_open(&self) -> bool { + match self { + LazyReadDir::OpenReadDir(_, _) => true, + _ => false, + } + } + } + + impl AsFd for LazyReadDir { + fn as_fd(&self) -> BorrowedFd<'_> { + match self { + LazyReadDir::Fd(Some(fd)) => fd.as_fd(), + LazyReadDir::Fd(None) => { + panic!("LazyReadDir::as_fd() called, but no fd present") + } + LazyReadDir::OpenReadDir(_, fd) => unsafe { BorrowedFd::borrow_raw_fd(*fd) }, + } + } + } + + impl Iterator for LazyReadDir { + type Item = io::Result; + + fn next(&mut self) -> Option> { + if let Err(err) = self.ensure_open() { + return Some(Err(err)); + } + match self { + LazyReadDir::OpenReadDir(rd, _) => rd.next(), + _ => { + unreachable!(); + } + } + } + } + + struct DirComponent { + name: CString, + dev: dev_t, + ino: u64, + } + + impl DirComponent { + fn new(name: &CStr, fd: BorrowedFd<'_>) -> io::Result { + let mut stat = unsafe { mem::zeroed() }; + cvt(unsafe { fstat64(fd.as_raw_fd(), &mut stat) })?; + Ok(DirComponent { name: name.to_owned(), dev: stat.st_dev, ino: stat.st_ino }) + } + + fn verify_dev_ino(&self, fd: BorrowedFd<'_>) -> io::Result<()> { + let mut stat = unsafe { mem::zeroed() }; + cvt(unsafe { fstat64(fd.as_raw_fd(), &mut stat) })?; + // Make sure that the reopened directory has the same inode as when we visited it descending + // the directory tree. More detailed risk analysis TBD. + if self.dev != stat.st_dev || self.ino != stat.st_ino { + return Err(io::Error::new( + io::ErrorKind::Uncategorized, + "directory inode does not match", + )); + } + Ok(()) + } + } + + fn remove_dir_all_loop(root: &Path) -> io::Result<()> { + // all ancestor names and inodes from the deletion root directory to the parent of the currently processed + // directory + let mut parent_dir_components = Vec::new(); + // cache of up to MAX_OPEN_FDS ancestor ReadDirs and associated file descriptors + let mut readdir_cache = VecDeque::with_capacity(MAX_OPEN_FDS); + // the directory name, inode pair and ReadDir currently being processed + let mut current_readdir = LazyReadDir::from_path(&cstr(root)?)?; + let mut current_dir_component = DirComponent::new( + unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }, + current_readdir.as_fd(), + )?; + let root_parent_component = DirComponent::new( + unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }, + current_readdir.get_parent()?.as_fd(), + )?; + loop { + while let Some(child) = current_readdir.next() { + let child = child?; + if !unlink_direntry(&child, current_readdir.as_fd())? { + // Descend into this child directory + + let child_readdir = current_readdir.get_child(child.name_cstr())?; + let child_dir_compoment = + DirComponent::new(child.name_cstr(), child_readdir.as_fd())?; + parent_dir_components.push(current_dir_component); + + // avoid growing the cache over capacity + if readdir_cache.len() == readdir_cache.capacity() { + readdir_cache.pop_front(); } - result => { - result?; + readdir_cache.push_back(current_readdir); + + current_readdir = child_readdir; + current_dir_component = child_dir_compoment; + } + } + + match parent_dir_components.pop() { + Some(parent) => { + // Going back up... + + // Get parent directory readdir + let parent_readdir = match readdir_cache.pop_back() { + Some(readdir) => readdir, + None => { + // cache is empty - reopen parent + let parent_readdir = current_readdir.get_parent()?; + parent.verify_dev_ino(parent_readdir.as_fd())?; + parent_readdir + } + }; + + // Remove now empty directory + cvt(unsafe { + unlinkat( + parent_readdir.as_fd().as_raw_fd(), + current_dir_component.name.as_ptr(), + libc::AT_REMOVEDIR, + ) + })?; + + current_dir_component = parent; + current_readdir = parent_readdir; + + // If we don't have readdir open for the current directory that means we got the file descriptor + // via openat(dirfd, ".."). To make sure the that the previous child directory was not moved + // somewhere else and its parent just happens to have the same reused (dev, inode) + // pair, that we found decending, we check the parent directory (dev, inode) as well. + if !current_readdir.is_open() && readdir_cache.is_empty() { + if let Some(parent) = parent_dir_components.last() { + let parent_readdir = current_readdir.get_parent()?; + parent.verify_dev_ino(parent_readdir.as_fd())?; + readdir_cache.push_back(parent_readdir); + } else { + // verify parent of the root directory + let parent_readdir = current_readdir.get_parent()?; + root_parent_component.verify_dev_ino(parent_readdir.as_fd())?; + } } - }, + } + None => break, } } - // unlink the directory after removing its contents - cvt(unsafe { - unlinkat(parent_fd.unwrap_or(libc::AT_FDCWD), pcstr.as_ptr(), libc::AT_REMOVEDIR) - })?; + // unlink root dir + cvt(unsafe { unlinkat(libc::AT_FDCWD, cstr(root)?.as_ptr(), libc::AT_REMOVEDIR) })?; Ok(()) } - pub fn remove_dir_all(p: &Path) -> io::Result<()> { - // We cannot just call remove_dir_all_recursive() here because that would not delete a passed - // symlink. No need to worry about races, because remove_dir_all_recursive() does not recurse + fn remove_dir_all_modern(p: &Path) -> io::Result<()> { + // We cannot just call remove_dir_all_loop() here because that would not delete a passed + // symlink. No need to worry about races, because remove_dir_all_loop() does not descend // into symlinks. let attr = lstat(p)?; if attr.file_type().is_symlink() { crate::fs::remove_file(p) } else { - remove_dir_all_recursive(None, p) + remove_dir_all_loop(p) + } + } + + #[cfg(not(all(target_os = "macos", target_arch = "x86_64")))] + pub fn remove_dir_all(p: &Path) -> io::Result<()> { + remove_dir_all_modern(p) + } + + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + pub fn remove_dir_all(p: &Path) -> io::Result<()> { + if macos_weak::has_openat() { + // openat() is available with macOS 10.10+, just like unlinkat() and fdopendir() + remove_dir_all_modern(p) + } else { + // fall back to classic implementation + crate::sys_common::fs::remove_dir_all(p) } } } diff --git a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs new file mode 100644 index 0000000000000..47d9194504667 --- /dev/null +++ b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs @@ -0,0 +1,36 @@ +// run-pass + +use std::env::{current_dir, set_current_dir}; +use std::fs::{create_dir, remove_dir_all, File}; +use std::path::Path; + +pub fn main() { + let saved_cwd = current_dir().unwrap(); + if !Path::exists(Path::new("tmpdir")) { + create_dir("tmpdir").unwrap(); + } + set_current_dir("tmpdir").unwrap(); + let depth = if cfg!(target_os = "linux") { + // Should work on all Linux filesystems. + 4096 + } else if cfg!(target_os = "macos") { + // On Macos increasing depth leads to a superlinear slowdown. + 1024 + } else if cfg!(unix) { + // Should be no problem on other UNIXes either. + 1024 + } else { + // "Safe" fallback for other platforms. + 64 + }; + for _ in 0..depth { + if !Path::exists(Path::new("a")) { + create_dir("empty_dir").unwrap(); + File::create("empty_file").unwrap(); + create_dir("a").unwrap(); + } + set_current_dir("a").unwrap(); + } + set_current_dir(saved_cwd).unwrap(); + remove_dir_all("tmpdir").unwrap(); +}