diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index e3a464a1c51a9..432ee574145c7 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -160,6 +160,12 @@ pub trait Pattern<'a>: Sized { None } } + + /// Return the pattern as a fixed slice of UTF-8 bytes, if possible. + #[inline] + fn as_bytes(&self) -> Option<&[u8]> { + None + } } // Searcher @@ -917,6 +923,11 @@ where /// Delegates to the `&str` impl. impl<'a, 'b, 'c> Pattern<'a> for &'c &'b str { pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s); + + #[inline] + fn as_bytes(&self) -> Option<&[u8]> { + (*self).as_bytes() + } } ///////////////////////////////////////////////////////////////////////////// @@ -1001,6 +1012,11 @@ impl<'a, 'b> Pattern<'a> for &'b str { None } } + + #[inline] + fn as_bytes(&self) -> Option<&[u8]> { + Some(str::as_bytes(self)) + } } ///////////////////////////////////////////////////////////////////////////// diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 5c0541d3caf33..f87ee88f645bc 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -8,6 +8,7 @@ use crate::fmt; use crate::hash::{Hash, Hasher}; use crate::ops; use crate::rc::Rc; +use crate::str::pattern::Pattern; use crate::str::FromStr; use crate::sync::Arc; @@ -978,6 +979,69 @@ impl OsStr { pub fn eq_ignore_ascii_case>(&self, other: S) -> bool { self.inner.eq_ignore_ascii_case(&other.as_ref().inner) } + + /// Returns `true` if the given pattern matches a prefix of this `OsStr`. + /// + /// Returns `false` if it does not. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: crate::str::pattern + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(osstr_str_prefix_fns)] + /// + /// use std::ffi::OsString; + /// + /// let bananas = OsString::from("bananas"); + /// + /// assert!(bananas.starts_with("bana")); + /// assert!(!bananas.starts_with("nana")); + /// ``` + #[unstable(feature = "osstr_str_prefix_fns", issue = "none")] + #[must_use] + #[inline] + pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool { + self.inner.starts_with(pattern) + } + + /// Returns this `OsStr` with the given prefix removed. + /// + /// If the `OsStr` starts with the pattern `prefix`, returns the substring + /// after the prefix, wrapped in `Some`. + /// + /// If the `OsStr` does not start with `prefix`, returns `None`. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: crate::str::pattern + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_fns)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let foobar = OsString::from("foo:bar"); + /// + /// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar"))); + /// assert_eq!(foobar.strip_prefix("bar"), None); + /// ``` + #[unstable(feature = "osstr_str_prefix_fns", issue = "none")] + #[must_use] + #[inline] + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a OsStr> { + Some(OsStr::from_inner(self.inner.strip_prefix(prefix)?)) + } } #[stable(feature = "box_from_os_str", since = "1.17.0")] diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 318a46d1b637e..4188c8c950bdb 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -264,6 +264,7 @@ #![feature(needs_panic_runtime)] #![feature(negative_impls)] #![feature(never_type)] +#![feature(pattern)] #![feature(platform_intrinsics)] #![feature(prelude_import)] #![feature(rustc_attrs)] diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs index 488217f39413f..3157ad1a33205 100644 --- a/library/std/src/sys/unix/os_str.rs +++ b/library/std/src/sys/unix/os_str.rs @@ -8,6 +8,7 @@ use crate::fmt::Write; use crate::mem; use crate::rc::Rc; use crate::str; +use crate::str::pattern::{Pattern, SearchStep, Searcher}; use crate::sync::Arc; use crate::sys_common::{AsInner, IntoInner}; @@ -270,4 +271,46 @@ impl Slice { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.inner.eq_ignore_ascii_case(&other.inner) } + + fn to_str_prefix(&self) -> &str { + let utf8_err = match str::from_utf8(&self.inner) { + Ok(prefix) => return prefix, + Err(err) => err, + }; + let utf8_len = utf8_err.valid_up_to(); + if utf8_len == 0 { + return ""; + } + // SAFETY: `Utf8Error::valid_up_to()` returns an index up to which + // valid UTF-8 has been verified. + unsafe { str::from_utf8_unchecked(&self.inner[..utf8_len]) } + } + + #[inline] + pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool { + if let Some(pattern_bytes) = pattern.as_bytes() { + return self.inner.starts_with(pattern_bytes); + } + self.to_str_prefix().starts_with(pattern) + } + + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> { + if let Some(prefix_bytes) = prefix.as_bytes() { + let suffix = self.inner.strip_prefix(prefix_bytes)?; + return Some(Slice::from_u8_slice(suffix)); + } + + let p = self.to_str_prefix(); + let prefix_len = match prefix.into_searcher(p).next() { + SearchStep::Match(0, prefix_len) => prefix_len, + _ => return None, + }; + + // SAFETY: `p` is guaranteed to be a prefix of `self.inner`, + // and `Searcher` is known to return valid indices. + unsafe { + let suffix = self.inner.get_unchecked(prefix_len..); + Some(Slice::from_u8_slice(suffix)) + } + } } diff --git a/library/std/src/sys/unix/os_str/tests.rs b/library/std/src/sys/unix/os_str/tests.rs index 22ba0c9235041..9c5a951771ad7 100644 --- a/library/std/src/sys/unix/os_str/tests.rs +++ b/library/std/src/sys/unix/os_str/tests.rs @@ -16,3 +16,37 @@ fn display() { Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(), ); } + +#[test] +fn slice_starts_with() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.starts_with('h')); + assert!(slice.starts_with("héllô")); + assert!(!slice.starts_with("héllô=wørld")); +} + +#[test] +fn slice_strip_prefix() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.strip_prefix("héllô=wørld").is_none()); + + { + let suffix = slice.strip_prefix('h'); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"\xC3\xA9ll\xC3\xB4=\xFFw\xC3\xB8rld",); + } + + { + let suffix = slice.strip_prefix("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld"); + } +} diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs index 2f2b0e56e0889..f7ad56a5987d3 100644 --- a/library/std/src/sys/windows/os_str.rs +++ b/library/std/src/sys/windows/os_str.rs @@ -5,6 +5,7 @@ use crate::collections::TryReserveError; use crate::fmt; use crate::mem; use crate::rc::Rc; +use crate::str::pattern::Pattern; use crate::sync::Arc; use crate::sys_common::wtf8::{Wtf8, Wtf8Buf}; use crate::sys_common::{AsInner, FromInner, IntoInner}; @@ -156,6 +157,13 @@ impl Slice { unsafe { mem::transmute(Wtf8::from_str(s)) } } + #[inline] + fn from_inner(inner: &Wtf8) -> &Slice { + // SAFETY: Slice is just a wrapper of Wtf8, + // therefore converting &Wtf8 to &Slice is safe. + unsafe { &*(inner as *const Wtf8 as *const Slice) } + } + pub fn to_str(&self) -> Option<&str> { self.inner.as_str() } @@ -222,4 +230,14 @@ impl Slice { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.inner.eq_ignore_ascii_case(&other.inner) } + + #[inline] + pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool { + self.inner.starts_with(pattern) + } + + #[inline] + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> { + Some(Slice::from_inner(self.inner.strip_prefix(prefix)?)) + } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index ff96c35fb0ba6..043e9d8873536 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -31,6 +31,7 @@ use crate::ops; use crate::rc::Rc; use crate::slice; use crate::str; +use crate::str::pattern::{Pattern, SearchStep, Searcher}; use crate::sync::Arc; use crate::sys_common::AsInner; @@ -781,6 +782,52 @@ impl Wtf8 { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.bytes.eq_ignore_ascii_case(&other.bytes) } + + fn to_str_prefix(&self) -> &str { + let utf8_bytes = match self.next_surrogate(0) { + None => &self.bytes, + Some((0, _)) => b"", + Some((surrogate_pos, _)) => { + let (utf8_bytes, _) = self.bytes.split_at(surrogate_pos); + utf8_bytes + } + }; + + // SAFETY: `utf8_bytes` is a prefix of a WTF-8 value that contains no + // surrogates, and well-formed WTF-8 that contains no surrogates is + // also well-formed UTF-8. + unsafe { str::from_utf8_unchecked(utf8_bytes) } + } + + #[inline] + pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool { + if let Some(pattern_bytes) = pattern.as_bytes() { + return self.bytes.starts_with(pattern_bytes); + } + self.to_str_prefix().starts_with(pattern) + } + + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Wtf8> { + if let Some(prefix_bytes) = prefix.as_bytes() { + let suffix = self.bytes.strip_prefix(prefix_bytes)?; + // SAFETY: WTF-8 is a superset of UTF-8, so stripping off a UTF-8 + // prefix will yield a suffix that is valid WTF-8. + return unsafe { Some(Wtf8::from_bytes_unchecked(suffix)) }; + } + + let p = self.to_str_prefix(); + let prefix_len = match prefix.into_searcher(p).next() { + SearchStep::Match(0, prefix_len) => prefix_len, + _ => return None, + }; + + // SAFETY: `p` is guaranteed to be a prefix of `self.bytes`, + // and `Searcher` is known to return valid indices. + unsafe { + let suffix = self.bytes.get_unchecked(prefix_len..); + Some(Wtf8::from_bytes_unchecked(suffix)) + } + } } /// Returns a slice of the given string for the byte range \[`begin`..`end`). diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 1a302d646941b..6cef0548d70a7 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -664,3 +664,37 @@ fn wtf8_to_owned() { assert_eq!(string.bytes, b"\xED\xA0\x80"); assert!(!string.is_known_utf8); } + +#[test] +fn wtf8_starts_with() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.starts_with('h')); + assert!(slice.starts_with("héllô")); + assert!(!slice.starts_with("héllô=wørld")); +} + +#[test] +fn wtf8_strip_prefix() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.strip_prefix("héllô=wørld").is_none()); + + { + let suffix = slice.strip_prefix('h'); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"\xC3\xA9ll\xC3\xB4=\xED\xA0\x80w\xC3\xB8rld",); + } + + { + let suffix = slice.strip_prefix("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld"); + } +}