From c06d25059a3020209f1aa38b731c8851d3244aef Mon Sep 17 00:00:00 2001 From: Demi Obenour Date: Thu, 26 Oct 2017 16:26:54 -0400 Subject: [PATCH] Support long file names on Windows Windows does not support file names of >255 UTF-16 characters, unless the path is prefixed with `\\?\` or `\??\`. Both of those, however, supress normalization. So Rust must do that normalization itself. This commit makes the standard library do just that, by calling the Windows API function GetFullPathNameW(). --- src/libstd/sys/windows/c.rs | 4 +++ src/libstd/sys/windows/fs.rs | 30 +++++++++---------- src/libstd/sys/windows/mod.rs | 45 ++++++++++++++++++++++++++++ src/libstd/sys/windows/path.rs | 55 ++++++++++++++++++---------------- 4 files changed, 94 insertions(+), 40 deletions(-) diff --git a/src/libstd/sys/windows/c.rs b/src/libstd/sys/windows/c.rs index 39e00270233b4..d783a36677f88 100644 --- a/src/libstd/sys/windows/c.rs +++ b/src/libstd/sys/windows/c.rs @@ -910,6 +910,10 @@ extern "system" { pub fn GetConsoleMode(hConsoleHandle: HANDLE, lpMode: LPDWORD) -> BOOL; + pub fn GetFullPathNameW(lpFileName: LPCWSTR, + nBufferLength: DWORD, + lpBuffer: LPWSTR, + lpFilePart: *mut LPWSTR) -> DWORD; pub fn RemoveDirectoryW(lpPathName: LPCWSTR) -> BOOL; pub fn SetFileAttributesW(lpFileName: LPCWSTR, dwFileAttributes: DWORD) -> BOOL; diff --git a/src/libstd/sys/windows/fs.rs b/src/libstd/sys/windows/fs.rs index ae9535139d997..ef4b9670064c7 100644 --- a/src/libstd/sys/windows/fs.rs +++ b/src/libstd/sys/windows/fs.rs @@ -23,7 +23,7 @@ use sys::time::SystemTime; use sys::{c, cvt}; use sys_common::FromInner; -use super::to_u16s; +use super::to_u16path; pub struct File { handle: Handle } @@ -255,7 +255,7 @@ impl OpenOptions { impl File { pub fn open(path: &Path, opts: &OpenOptions) -> io::Result { - let path = to_u16s(path)?; + let path = to_u16path(path)?; let handle = unsafe { c::CreateFileW(path.as_ptr(), opts.get_access_mode()?, @@ -547,7 +547,7 @@ impl DirBuilder { pub fn new() -> DirBuilder { DirBuilder } pub fn mkdir(&self, p: &Path) -> io::Result<()> { - let p = to_u16s(p)?; + let p = to_u16path(p)?; cvt(unsafe { c::CreateDirectoryW(p.as_ptr(), ptr::null_mut()) })?; @@ -558,7 +558,7 @@ impl DirBuilder { pub fn readdir(p: &Path) -> io::Result { let root = p.to_path_buf(); let star = p.join("*"); - let path = to_u16s(&star)?; + let path = to_u16path(&star)?; unsafe { let mut wfd = mem::zeroed(); @@ -576,14 +576,14 @@ pub fn readdir(p: &Path) -> io::Result { } pub fn unlink(p: &Path) -> io::Result<()> { - let p_u16s = to_u16s(p)?; + let p_u16s = to_u16path(p)?; cvt(unsafe { c::DeleteFileW(p_u16s.as_ptr()) })?; Ok(()) } pub fn rename(old: &Path, new: &Path) -> io::Result<()> { - let old = to_u16s(old)?; - let new = to_u16s(new)?; + let old = to_u16path(old)?; + let new = to_u16path(new)?; cvt(unsafe { c::MoveFileExW(old.as_ptr(), new.as_ptr(), c::MOVEFILE_REPLACE_EXISTING) })?; @@ -591,7 +591,7 @@ pub fn rename(old: &Path, new: &Path) -> io::Result<()> { } pub fn rmdir(p: &Path) -> io::Result<()> { - let p = to_u16s(p)?; + let p = to_u16path(p)?; cvt(unsafe { c::RemoveDirectoryW(p.as_ptr()) })?; Ok(()) } @@ -639,8 +639,8 @@ pub fn symlink(src: &Path, dst: &Path) -> io::Result<()> { } pub fn symlink_inner(src: &Path, dst: &Path, dir: bool) -> io::Result<()> { - let src = to_u16s(src)?; - let dst = to_u16s(dst)?; + let src = to_u16path(src)?; + let dst = to_u16path(dst)?; let flags = if dir { c::SYMBOLIC_LINK_FLAG_DIRECTORY } else { 0 }; // Formerly, symlink creation required the SeCreateSymbolicLink privilege. For the Windows 10 // Creators Update, Microsoft loosened this to allow unprivileged symlink creation if the @@ -665,8 +665,8 @@ pub fn symlink_inner(src: &Path, dst: &Path, dir: bool) -> io::Result<()> { } pub fn link(src: &Path, dst: &Path) -> io::Result<()> { - let src = to_u16s(src)?; - let dst = to_u16s(dst)?; + let src = to_u16path(src)?; + let dst = to_u16path(dst)?; cvt(unsafe { c::CreateHardLinkW(dst.as_ptr(), src.as_ptr(), ptr::null_mut()) })?; @@ -693,7 +693,7 @@ pub fn lstat(path: &Path) -> io::Result { } pub fn set_perm(p: &Path, perm: FilePermissions) -> io::Result<()> { - let p = to_u16s(p)?; + let p = to_u16path(p)?; unsafe { cvt(c::SetFileAttributesW(p.as_ptr(), perm.attrs))?; Ok(()) @@ -734,8 +734,8 @@ pub fn copy(from: &Path, to: &Path) -> io::Result { if dwStreamNumber == 1 {*(lpData as *mut i64) = StreamBytesTransferred;} c::PROGRESS_CONTINUE } - let pfrom = to_u16s(from)?; - let pto = to_u16s(to)?; + let pfrom = to_u16path(from)?; + let pto = to_u16path(to)?; let mut size = 0i64; cvt(unsafe { c::CopyFileExW(pfrom.as_ptr(), pto.as_ptr(), Some(callback), diff --git a/src/libstd/sys/windows/mod.rs b/src/libstd/sys/windows/mod.rs index e7a9a121b2519..4875b72da30cd 100644 --- a/src/libstd/sys/windows/mod.rs +++ b/src/libstd/sys/windows/mod.rs @@ -15,6 +15,7 @@ use ffi::{OsStr, OsString}; use io::{self, ErrorKind}; use os::windows::ffi::{OsStrExt, OsStringExt}; use path::PathBuf; +use sys::windows::path::parse_prefix; use time::Duration; #[macro_use] pub mod compat; @@ -93,6 +94,50 @@ pub fn to_u16s>(s: S) -> io::Result> { inner(s.as_ref()) } +// Windows APIs that take a path are especially tricky. For legacy reasons, +// Windows has a limit of 255 characters for a path, unless one prefixes the +// path with \\?\ or \??\. That is too short! We want Rust programs to not +// have this limit. So Rust must add this before calling the relevant Windows +// APIs. That, however, disables filename normalization, so Rust must call +// GetFullPathNameW() (which thankfully works on long paths) before passing the +// buffer to the operating system. +pub fn to_u16path>(s: S) -> io::Result> { + fn inner(s: &OsStr) -> io::Result> { + if len(s) >= 4 { + match s[0..4] { + br"\\?\" | br"\??\" => return to_u16s(s), + } + } + let wide = to_u16s(s)?; + + if wide.len() < 256 { + match s.get(1) { + Some(& b':') => match s[0] { + b'a'...b'z' | b'A'...b'Z' => { + // Disk path. Windows will handle this fine. + return Ok(wide) + } + } + Some(& b'\\') => if s[0] == b'\\' { + if s[2..].iter().position(|x| x == b'\\').is_some() { + // Short UNC path. Again, Windows will handle this one fine. + return Ok(wide) + } + } + } + } + + fill_utf16_buf(|ptr, len| unsafe { + // There is a race condition here: if another thread is changing the + // current directory, this could return inconsistemnt values between + // two calls. Hence we must use fill_utf16_buf() (which uses a + // loop). + c::GetFullPathNameW(wide.as_ptr(), len, ptr, ptr::null_mut()) + }, |slice| [92, 92, 63, 92].chain(slice).chain(Some(0)).collect()) + } + inner(s.as_ref()) +} + // Many Windows APIs follow a pattern of where we hand a buffer and then they // will report back to us how large the buffer should be or how many bytes // currently reside in the buffer. This function is an abstraction over these diff --git a/src/libstd/sys/windows/path.rs b/src/libstd/sys/windows/path.rs index 2b47808451bc2..ee03bd6fe4d8c 100644 --- a/src/libstd/sys/windows/path.rs +++ b/src/libstd/sys/windows/path.rs @@ -40,35 +40,38 @@ pub fn parse_prefix<'a>(path: &'a OsStr) -> Option { // only from ASCII-bounded slices of existing &OsStr values. let mut path = os_str_as_u8_slice(path); + let parse_verbatim = |path| { + path = &path[2..]; + if path.starts_with(br"UNC\") { + // \\?\UNC\server\share + path = &path[4..]; + let (server, share) = match parse_two_comps(path, is_verbatim_sep) { + Some((server, share)) => + (u8_slice_as_os_str(server), u8_slice_as_os_str(share)), + None => (u8_slice_as_os_str(path), u8_slice_as_os_str(&[])), + }; + Some(VerbatimUNC(server, share)); + } else { + // \\?\path + let idx = path.iter().position(|&b| b == b'\\'); + if idx == Some(2) && path[1] == b':' { + let c = path[0]; + if c.is_ascii() && (c as char).is_alphabetic() { + // \\?\C:\ path + return Some(VerbatimDisk(c.to_ascii_uppercase())); + } + } + let slice = &path[..idx.unwrap_or(path.len())]; + Some(Verbatim(u8_slice_as_os_str(slice))); + } + } if path.starts_with(br"\\") { // \\ path = &path[2..]; if path.starts_with(br"?\") { // \\?\ - path = &path[2..]; - if path.starts_with(br"UNC\") { - // \\?\UNC\server\share - path = &path[4..]; - let (server, share) = match parse_two_comps(path, is_verbatim_sep) { - Some((server, share)) => - (u8_slice_as_os_str(server), u8_slice_as_os_str(share)), - None => (u8_slice_as_os_str(path), u8_slice_as_os_str(&[])), - }; - return Some(VerbatimUNC(server, share)); - } else { - // \\?\path - let idx = path.iter().position(|&b| b == b'\\'); - if idx == Some(2) && path[1] == b':' { - let c = path[0]; - if c.is_ascii() && (c as char).is_alphabetic() { - // \\?\C:\ path - return Some(VerbatimDisk(c.to_ascii_uppercase())); - } - } - let slice = &path[..idx.unwrap_or(path.len())]; - return Some(Verbatim(u8_slice_as_os_str(slice))); - } - } else if path.starts_with(b".\\") { + return parse_verbatim(path) + } else if path.starts_with(br".\") { // \\.\path path = &path[2..]; let pos = path.iter().position(|&b| b == b'\\'); @@ -88,8 +91,10 @@ pub fn parse_prefix<'a>(path: &'a OsStr) -> Option { if c.is_ascii() && (c as char).is_alphabetic() { return Some(Disk(c.to_ascii_uppercase())); } + } else if path.starts_with(br"\??\") { + return parse_verbatim(path) } - return None; + None } fn parse_two_comps(mut path: &[u8], f: fn(u8) -> bool) -> Option<(&[u8], &[u8])> {