From 71b64e677e548dd81ef250fec3c673bf4ecf0d80 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 17:29:36 +0200 Subject: [PATCH 01/21] Change constant from `cfg`ed literal into usize-based const This also fixes missing docs on 32-bit platforms and allows using this crate for platforms other than 32- and 64-bit. --- src/inline_string.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index 7d4cf8d..a9ddb8d 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -47,10 +47,10 @@ use std::str; /// /// Sometime in the future, when Rust's generics support specializing with /// compile-time static integers, this number should become configurable. -#[cfg(target_pointer_width = "64")] -pub const INLINE_STRING_CAPACITY: usize = 30; -#[cfg(target_pointer_width = "32")] -pub const INLINE_STRING_CAPACITY: usize = 14; +pub const INLINE_STRING_CAPACITY: usize = { + use mem::size_of; + size_of::() + size_of::() - 2 +}; /// A short UTF-8 string that uses inline storage and does no heap allocation. /// From 39a0beccd00ddb422f7454bacadef46efe3ed604 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 18:09:55 +0200 Subject: [PATCH 02/21] Sanity changes to O(1) from O(n) Strictly speaking, `truncate` not throwing panic with index >= length is a breaking change, but that was not very String-like anyway. --- src/inline_string.rs | 52 ++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index a9ddb8d..cba1642 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -416,8 +416,9 @@ impl InlineString { /// /// # Panics /// - /// Panics if `new_len` > current length, or if `new_len` is not a character - /// boundary. + /// Panics if `new_len` does not lie on a [`char`] boundary. + /// + /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html /// /// # Examples /// @@ -432,15 +433,11 @@ impl InlineString { pub fn truncate(&mut self, new_len: usize) { self.assert_sanity(); - assert!( - self.char_indices().any(|(i, _)| i == new_len), - "inlinable_string::InlineString::truncate: new_len is not a character - boundary" - ); - assert!(new_len <= self.len()); + if new_len < self.len() { + assert!(self[..].is_char_boundary(new_len)); - self.length = new_len as u8; - self.assert_sanity(); + self.length = new_len as u8; + } } /// Removes the last character from the string buffer and returns it. @@ -476,8 +473,10 @@ impl InlineString { /// /// # Panics /// - /// If `idx` does not lie on a character boundary, or if it is out of - /// bounds, then this function will panic. + /// Panics if `idx` is larger than or equal to the `String`'s length, + /// or if it does not lie on a [`char`] boundary. + /// + /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html /// /// # Examples /// @@ -492,29 +491,16 @@ impl InlineString { #[inline] pub fn remove(&mut self, idx: usize) -> char { self.assert_sanity(); - assert!(idx <= self.len()); - match self.char_indices().find(|&(i, _)| i == idx) { - None => panic!( - "inlinable_string::InlineString::remove: idx does not lie on a - character boundary" - ), - Some((_, ch)) => { - let char_len = ch.len_utf8(); - let next = idx + char_len; - - unsafe { - ptr::copy( - self.bytes.as_ptr().add(next), - self.bytes.as_mut_ptr().add(idx), - self.len() - next, - ); - } - self.length -= char_len as u8; + let ch = match self[idx..].chars().next() { + Some(ch) => ch, + None => panic!("cannot remove a char from the end of a string"), + }; - self.assert_sanity(); - ch - } + self.bytes.copy_within(idx + ch.len_utf8().., idx); + + ch + } } } From dfad7d7f2c97dd84ee2105276540f93dad70b732 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 18:27:17 +0200 Subject: [PATCH 03/21] Three unsafe helper functions --- src/inline_string.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/inline_string.rs b/src/inline_string.rs index cba1642..36be87f 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -286,6 +286,55 @@ impl InlineString { ); } + /// Turn a string slice into `InlineString` without checks. + /// + /// # Safety: + /// + /// It is instant UB if the length of `s` is bigger than `INLINE_STRING_CAPACITY`. + unsafe fn from_str_unchecked(s: &str) -> Self { + let string_len = s.len(); + debug_assert!( + string_len <= INLINE_STRING_CAPACITY as usize, + "inlinable_string: internal error: length greater than capacity" + ); + + let mut ss = InlineString::new(); + unsafe { + ptr::copy_nonoverlapping(s.as_ptr(), ss.bytes.as_mut_ptr(), string_len); + } + ss.length = string_len as u8; + + ss.assert_sanity(); + + ss + } + + /// Returns a mutable reference to the inner buffer. + /// + /// Safety + /// + /// The same as [`str::as_bytes_mut()`]. + /// + ///[`str::as_bytes_mut()`]: https://doc.rust-lang.org/std/primitive.str.html#method.as_bytes_mut + #[inline] + pub(crate) unsafe fn as_bytes_mut(&mut self) -> &mut [u8; INLINE_STRING_CAPACITY] { + &mut self.bytes + } + + /// Insanely unsafe function to set length. + /// + /// Safety + /// + /// It's UB if `new_len` + /// + /// * is bigger than `INLINE_STRING_CAPACITY`; + /// * doesn't lie at the start and/or end of a UTF-8 code point sequence; + /// * grabs some uninitialized memory. + #[inline] + pub(crate) unsafe fn set_len(&mut self, new_len: usize) { + self.length = new_len as u8 + } + /// Creates a new string buffer initialized with the empty string. /// /// # Examples From 6cdca6621de24f9137077d6b8a4cbb721a307d55 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 18:30:46 +0200 Subject: [PATCH 04/21] Added `BorrowMut` impl for `InlinableString` --- src/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 6b864db..53f5c72 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -205,6 +205,12 @@ impl Borrow for InlinableString { } } +impl BorrowMut for InlinableString { + fn borrow_mut(&mut self) -> &mut str { + &mut *self + } +} + impl AsRef for InlinableString { fn as_ref(&self) -> &str { match *self { From 90f050153905038c675480febe0531311286cc01 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 18:33:35 +0200 Subject: [PATCH 05/21] Utility change in tests; nothing really changed --- src/lib.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 53f5c72..f37dcdf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -652,6 +652,15 @@ mod tests { use std::cmp::Ordering; use std::iter::FromIterator; + const LONG_STR: &str = "this is a really long string that is much larger than + INLINE_STRING_CAPACITY and so cannot be stored inline."; + + #[test] + fn test_long_string() { + // If this fails, increase the size of the long string. + assert!(LONG_STR.len() > INLINE_STRING_CAPACITY); + } + #[test] fn test_size() { use std::mem::size_of; @@ -667,10 +676,8 @@ mod tests { s.push_str("small"); assert_eq!(s, "small"); - let long_str = "this is a really long string that is much larger than - INLINE_STRING_CAPACITY and so cannot be stored inline."; - s.push_str(long_str); - assert_eq!(s, String::from("small") + long_str); + s.push_str(LONG_STR); + assert_eq!(s, String::from("small") + LONG_STR); } #[test] @@ -680,10 +687,8 @@ mod tests { write!(&mut s, "small").expect("!write"); assert_eq!(s, "small"); - let long_str = "this is a really long string that is much larger than - INLINE_STRING_CAPACITY and so cannot be stored inline."; - write!(&mut s, "{}", long_str).expect("!write"); - assert_eq!(s, String::from("small") + long_str); + write!(&mut s, "{}", LONG_STR).expect("!write"); + assert_eq!(s, String::from("small") + LONG_STR); } #[test] From 9288324d03743b8477b15d6d2a5686f1f1e8d9ff Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 18:45:26 +0200 Subject: [PATCH 06/21] Provided some of the previous methods of the `StringExt` --- src/string_ext.rs | 406 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 370 insertions(+), 36 deletions(-) diff --git a/src/string_ext.rs b/src/string_ext.rs index 3a6532d..a49475b 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -37,9 +37,10 @@ pub trait StringExt<'a>: /// /// let s = InlinableString::new(); /// ``` - fn new() -> Self - where - Self: Sized; + #[inline] + fn new() -> Self { + Self::with_capacity(0) + } /// Creates a new string buffer with the given capacity. The string will be /// able to hold at least `capacity` bytes without reallocating. If @@ -79,9 +80,10 @@ pub trait StringExt<'a>: /// let err = s.utf8_error(); /// assert_eq!(s.into_bytes(), [240, 144, 128]); /// ``` - fn from_utf8(vec: Vec) -> Result - where - Self: Sized; + #[inline] + fn from_utf8(vec: Vec) -> Result { + String::from_utf8(vec).map(from_string) + } /// Converts a vector of bytes to a new UTF-8 string. /// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER. @@ -95,10 +97,8 @@ pub trait StringExt<'a>: /// let output = InlinableString::from_utf8_lossy(input); /// assert_eq!(output, "Hello \u{FFFD}World"); /// ``` - fn from_utf8_lossy(v: &'a [u8]) -> Cow<'a, str> - where - Self: Sized, - { + #[inline] + fn from_utf8_lossy(v: &[u8]) -> Cow { String::from_utf8_lossy(v) } @@ -120,9 +120,10 @@ pub trait StringExt<'a>: /// v[4] = 0xD800; /// assert!(InlinableString::from_utf16(v).is_err()); /// ``` - fn from_utf16(v: &[u16]) -> Result - where - Self: Sized; + #[inline] + fn from_utf16(v: &[u16]) -> Result { + String::from_utf16(v).map(from_string) + } /// Decode a UTF-16 encoded vector `v` into a string, replacing /// invalid data with the replacement character (U+FFFD). @@ -140,25 +141,30 @@ pub trait StringExt<'a>: /// assert_eq!(InlinableString::from_utf16_lossy(v), /// InlinableString::from("𝄞mus\u{FFFD}ic\u{FFFD}")); /// ``` - fn from_utf16_lossy(v: &[u16]) -> Self - where - Self: Sized; + #[inline] + fn from_utf16_lossy(v: &[u16]) -> Self { + from_string(String::from_utf16_lossy(v)) + } - /// Creates a new `InlinableString` from a length, capacity, and pointer. + /// Creates a new string from a length, capacity, and pointer. /// /// # Safety /// - /// This is _very_ unsafe because: + /// This function is just a shortened call to two other unsafe functions, + /// therefore it inherits all unsafety of those: /// - /// * We call `String::from_raw_parts` to get a `Vec`. Therefore, this - /// function inherits all of its unsafety, see [its - /// documentation](https://doc.rust-lang.org/nightly/collections/vec/struct.Vec.html#method.from_raw_parts) - /// for the invariants it expects, they also apply to this function. + /// * First, [`Vec::from_raw_parts`] is called onto arguments; + /// see the method documentation for the invariants it expects. /// - /// * We assume that the `Vec` contains valid UTF-8. - unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> Self - where - Self: Sized; + /// * Then [`StringExt::from_utf8_unchecked`] is called onto the given vector, + /// thus the vector must hold valid UTF-8 encoded string. + /// + /// [`Vec::from_raw_parts`]: https://doc.rust-lang.org/std/vec/struct.Vec.html#method.from_raw_parts + /// [`StringExt::from_utf8_unchecked`]: #tymethod.from_utf8_unchecked + #[inline] + unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> Self { + Self::from_utf8_unchecked(Vec::from_raw_parts(buf, length, capacity)) + } /// Converts a vector of bytes to a new `InlinableString` without checking /// if it contains valid UTF-8. @@ -167,9 +173,7 @@ pub trait StringExt<'a>: /// /// This is unsafe because it assumes that the UTF-8-ness of the vector has /// already been validated. - unsafe fn from_utf8_unchecked(bytes: Vec) -> Self - where - Self: Sized; + unsafe fn from_utf8_unchecked(bytes: Vec) -> Self; /// Returns the underlying byte buffer, encoded as UTF-8. /// @@ -182,7 +186,13 @@ pub trait StringExt<'a>: /// let bytes = s.into_bytes(); /// assert_eq!(bytes, [104, 101, 108, 108, 111]); /// ``` - fn into_bytes(self) -> Vec; + #[inline] + fn into_bytes(self) -> Vec + where + Self: Into, + { + Into::into(self).into_bytes() + } /// Pushes the given string onto this string buffer. /// @@ -195,7 +205,11 @@ pub trait StringExt<'a>: /// s.push_str("bar"); /// assert_eq!(s, "foobar"); /// ``` - fn push_str(&mut self, string: &str); + #[inline] + fn push_str(&mut self, string: &str) { + let len = self.len(); + self.insert_str(len, string); + } /// Returns the number of bytes that this string buffer can hold without /// reallocating. @@ -282,7 +296,11 @@ pub trait StringExt<'a>: /// s.push('3'); /// assert_eq!(s, "abc123"); /// ``` - fn push(&mut self, ch: char); + #[inline] + fn push(&mut self, ch: char) { + let len = self.len(); + self.insert(len, ch); + } /// Works with the underlying buffer as a byte slice. /// @@ -294,14 +312,23 @@ pub trait StringExt<'a>: /// let s = InlinableString::from("hello"); /// assert_eq!(s.as_bytes(), [104, 101, 108, 108, 111]); /// ``` - fn as_bytes(&self) -> &[u8]; + #[inline] + fn as_bytes(&self) -> &[u8] + where + Self: Borrow, + { + self.borrow().as_bytes() + } /// Shortens a string to the specified length. /// /// # Panics /// - /// Panics if `new_len` > current length, or if `new_len` is not a character - /// boundary. + /// Panics if `new_len` does not lie on a [`char`] boundary. + /// + /// For other possible panic conditions, read documentation of the given implementation. + /// + /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html /// /// # Examples /// @@ -398,7 +425,13 @@ pub trait StringExt<'a>: /// } /// assert_eq!(s, "olleh"); /// ``` - unsafe fn as_mut_slice(&mut self) -> &mut [u8]; + #[inline] + unsafe fn as_mut_slice(&mut self) -> &mut [u8] + where + Self: BorrowMut, + { + self.borrow_mut().as_bytes_mut() + } /// Returns the number of bytes in this string. /// @@ -444,6 +477,58 @@ pub trait StringExt<'a>: fn clear(&mut self) { self.truncate(0); } + + /// Extracts a string slice containing the entire string buffer. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let s = InlinableString::from("foo"); + /// + /// assert_eq!("foo", s.as_str()); + /// ``` + #[inline] + fn as_str(&self) -> &str + where + Self: Borrow, + { + self.borrow() + } + + /// Converts this extandable string into a mutable string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let mut s = InlinableString::from("foobar"); + /// let s_mut_str = s.as_mut_str(); + /// + /// s_mut_str.make_ascii_uppercase(); + /// + /// assert_eq!("FOOBAR", s_mut_str); + /// ``` + #[inline] + fn as_mut_str(&mut self) -> &mut str + where + Self: BorrowMut, + { + self.borrow_mut() + } + +/// Internal function to decrease the numbers of unsafe. +#[inline] +fn from_string(s: String) -> S { + // SAFETY: + // `s` is a well-formed string, turned into bytes. + unsafe { S::from_utf8_unchecked(::into_bytes(s)) } } impl<'a> StringExt<'a> for String { @@ -553,6 +638,255 @@ impl<'a> StringExt<'a> for String { } } +#[cfg(test)] +mod provided_methods_tests { + + use super::StringExt; + use std::{ + borrow::{Borrow, BorrowMut, Cow}, + cmp::PartialEq, + fmt, + ops::{Deref, DerefMut}, + }; + + #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] + struct ReqImpl(String); + + impl From for String { + fn from(s: ReqImpl) -> Self { + s.0 + } + } + + impl From<&str> for ReqImpl { + fn from(s: &str) -> Self { + Self(String::from(s)) + } + } + impl Deref for ReqImpl { + type Target = str; + fn deref(&self) -> &Self::Target { + &self.0 + } + } + impl DerefMut for ReqImpl { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } + } + impl fmt::Display for ReqImpl { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } + } + impl PartialEq for ReqImpl { + fn eq(&self, other: &str) -> bool { + self.0.eq(other) + } + } + impl PartialEq for ReqImpl { + fn eq(&self, other: &String) -> bool { + self.0.eq(other) + } + } + impl PartialEq<&str> for ReqImpl { + fn eq(&self, other: &&str) -> bool { + self.0.eq(other) + } + } + impl PartialEq> for ReqImpl { + fn eq(&self, other: &Cow) -> bool { + self.0.eq(other) + } + } + impl Borrow for ReqImpl { + fn borrow(&self) -> &str { + &self.0 + } + } + impl BorrowMut for ReqImpl { + fn borrow_mut(&mut self) -> &mut str { + &mut self.0 + } + } + + impl StringExt for ReqImpl { + fn with_capacity(capacity: usize) -> Self { + Self(String::with_capacity(capacity)) + } + unsafe fn from_utf8_unchecked(bytes: Vec) -> Self { + Self(String::from_utf8_unchecked(bytes)) + } + fn capacity(&self) -> usize { + self.0.capacity() + } + fn reserve(&mut self, additional: usize) { + self.0.reserve(additional) + } + fn reserve_exact(&mut self, additional: usize) { + self.0.reserve_exact(additional) + } + fn shrink_to_fit(&mut self) { + self.0.shrink_to_fit() + } + fn truncate(&mut self, new_len: usize) { + self.0.truncate(new_len) + } + fn pop(&mut self) -> Option { + self.0.pop() + } + fn remove(&mut self, idx: usize) -> char { + self.0.remove(idx) + } + fn insert(&mut self, idx: usize, ch: char) { + self.0.insert(idx, ch) + } + fn len(&self) -> usize { + self.0.len() + } + } + + #[test] + fn test_as_bytes() { + let s = ReqImpl::from("hello"); + assert_eq!(s.as_bytes(), [104, 101, 108, 108, 111]); + } + + #[test] + fn test_as_mut_slice() { + let mut s = ReqImpl::from("hello"); + unsafe { + let slice = s.as_mut_slice(); + assert!(slice == &[104, 101, 108, 108, 111]); + slice.reverse(); + } + assert_eq!(s, "olleh"); + } + + #[test] + fn test_as_mut_str() { + let mut s = ReqImpl::from("foobar"); + let s_mut_str = s.as_mut_str(); + + s_mut_str.make_ascii_uppercase(); + + assert_eq!("FOOBAR", s_mut_str); + } + + #[test] + fn test_as_str() { + let s = ReqImpl::from("foo"); + + assert_eq!("foo", s.as_str()); + } + + #[test] + fn test_clear() { + let mut s = ReqImpl::from("foo"); + s.clear(); + assert!(s.is_empty()); + } + + #[test] + fn test_from_raw_parts() { + use std::mem; + + unsafe { + let s = ReqImpl::from("hello"); + + let mut s = mem::ManuallyDrop::new(s); + + let ptr = s.0.as_mut_ptr(); + let len = s.len(); + let capacity = s.capacity(); + + let s = ReqImpl::from_raw_parts(ptr, len, capacity); + + assert_eq!(s, "hello"); + } + } + + #[test] + fn test_from_utf16() { + // 𝄞music + let v = &mut [0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063]; + assert_eq!(ReqImpl::from_utf16(v).unwrap(), ReqImpl::from("𝄞music")); + + // 𝄞muic + v[4] = 0xD800; + assert!(ReqImpl::from_utf16(v).is_err()); + } + + #[test] + fn test_from_utf16_lossy() { + // 𝄞music + let v = &[ + 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, + ]; + + assert_eq!( + ReqImpl::from_utf16_lossy(v), + ReqImpl::from("𝄞mus\u{FFFD}ic\u{FFFD}") + ); + } + + #[test] + fn test_from_utf8() { + let hello_vec = vec![104, 101, 108, 108, 111]; + let s = ReqImpl::from_utf8(hello_vec).unwrap(); + assert_eq!(s, "hello"); + + let invalid_vec = vec![240, 144, 128]; + let s = ReqImpl::from_utf8(invalid_vec).err().unwrap(); + let _err = s.utf8_error(); + assert_eq!(s.into_bytes(), [240, 144, 128]); + } + + #[test] + fn test_from_utf8_lossy() { + let input = b"Hello \xF0\x90\x80World"; + let output = ReqImpl::from_utf8_lossy(input); + assert_eq!(output, "Hello \u{FFFD}World"); + } + + #[test] + fn test_into_bytes() { + let s = ReqImpl::from("hello"); + let bytes = s.into_bytes(); + assert_eq!(bytes, [104, 101, 108, 108, 111]); + } + + #[test] + fn test_is_empty() { + let mut v = ReqImpl::new(); + assert!(v.is_empty()); + v.push('a'); + assert!(!v.is_empty()); + } + + #[test] + fn test_new() { + let s = ReqImpl::new(); + assert_eq!(ReqImpl::with_capacity(0), s); + } + + #[test] + fn test_push() { + let mut s = ReqImpl::from("abc"); + s.push('1'); + s.push('2'); + s.push('3'); + assert_eq!(s, "abc123"); + } + + #[test] + fn test_push_str() { + let mut s = ReqImpl::from("foo"); + s.push_str("bar"); + assert_eq!(s, "foobar"); + } +} + #[cfg(test)] mod std_string_stringext_sanity_tests { // Sanity tests for std::string::String's StringExt implementation. From 4ebaa6cdb58127484a67a8f96132359119470368 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 18:47:03 +0200 Subject: [PATCH 07/21] Breaking change in the trait definition Removed `Borrow` requirement, so non-linear strings could implement this trait now. Removed trait-bound lifetime. Added `Sized` requirement to the trait itself, due to `with_capacity` being a required method (it can't be implemented for unsized structs). --- src/string_ext.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/string_ext.rs b/src/string_ext.rs index a49475b..9a2624b 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -11,22 +11,25 @@ //! //! See the [crate level documentation](./../index.html) for more. -use std::borrow::{Borrow, Cow}; +use std::borrow::{Borrow, BorrowMut, Cow}; use std::cmp::PartialEq; use std::fmt::Display; +use std::ops::RangeBounds; +use std::str; use std::string::{FromUtf16Error, FromUtf8Error}; /// A trait that exists to abstract string operations over any number of /// concrete string type implementations. /// /// See the [crate level documentation](./../index.html) for more. -pub trait StringExt<'a>: - Borrow - + Display - + PartialEq - + PartialEq<&'a str> - + PartialEq - + PartialEq> +pub trait StringExt +where + for<'a> Self: Sized + + Display + + PartialEq + + PartialEq + + PartialEq<&'a str> + + PartialEq>, { /// Creates a new string buffer initialized with the empty string. /// From e973ef13a628ee0ccd4608d5a67bb0936f869c45 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 18:54:36 +0200 Subject: [PATCH 08/21] Breaking change Implement `TryFrom<&str>` instead of `From<&str>` for `InlineString`. Panics in `From` are bad. Really. Just read the trait docs: > Note: This trait must not fail. If the conversion can fail, use TryFrom. Plus, `TryFrom` allows for some nice safe capacity checks (the next commit will show all non-doc changes due to `TryFrom`). --- src/inline_string.rs | 82 ++++++++++++++++++++++++++++---------------- src/lib.rs | 5 +-- 2 files changed, 55 insertions(+), 32 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index 36be87f..ab33755 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -35,10 +35,12 @@ //! ``` use std::borrow; -use std::fmt; +use std::convert::{Infallible, TryFrom}; +use std::fmt::{self, Display}; use std::hash; use std::io::Write; -use std::ops; +use std::mem; +use std::ops::{self, RangeBounds}; use std::ptr; use std::str; @@ -97,25 +99,34 @@ impl AsMut<[u8]> for InlineString { } } -/// Create a `InlineString` from the given `&str`. -/// -/// # Panics -/// -/// If the given string's size is greater than `INLINE_STRING_CAPACITY`, this -/// method panics. -impl<'a> From<&'a str> for InlineString { - fn from(string: &'a str) -> InlineString { - let string_len = string.len(); - assert!(string_len <= INLINE_STRING_CAPACITY); +/// An error type for `InlineString` TryFrom impl. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct NotEnoughCapacityError; +impl Display for NotEnoughCapacityError { + #[inline] + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + "the length of the string is bigger than maximum capacity of `InlineString`".fmt(fmt) + } +} +impl From for NotEnoughCapacityError { + #[inline] + fn from(x: Infallible) -> NotEnoughCapacityError { + match x {} + } +} - let mut ss = InlineString::new(); - unsafe { - ptr::copy_nonoverlapping(string.as_ptr(), ss.bytes.as_mut_ptr(), string_len); - } - ss.length = string_len as u8; +impl TryFrom<&str> for InlineString { + type Error = NotEnoughCapacityError; - ss.assert_sanity(); - ss + fn try_from(string: &str) -> Result { + let string_len = string.len(); + if string_len <= INLINE_STRING_CAPACITY { + // SAFETY: + // `string_len` is not bigger than capacity. + unsafe { Ok(Self::from_str_unchecked(string)) } + } else { + Err(NotEnoughCapacityError) + } } } @@ -358,9 +369,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let s = InlineString::from("hello"); + /// let s = InlineString::try_from("hello").unwrap(); /// let bytes = s.into_bytes(); /// assert_eq!(&bytes[0..5], [104, 101, 108, 108, 111]); /// ``` @@ -378,9 +390,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("foo"); + /// let mut s = InlineString::try_from("foo").unwrap(); /// s.push_str("bar"); /// assert_eq!(s, "foobar"); /// ``` @@ -413,9 +426,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("abc"); + /// let mut s = InlineString::try_from("abc").unwrap(); /// s.push('1'); /// s.push('2'); /// s.push('3'); @@ -450,9 +464,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let s = InlineString::from("hello"); + /// let s = InlineString::try_from("hello").unwrap(); /// assert_eq!(s.as_bytes(), [104, 101, 108, 108, 111]); /// ``` #[inline] @@ -472,9 +487,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("hello"); + /// let mut s = InlineString::try_from("hello").unwrap(); /// s.truncate(2); /// assert_eq!(s, "he"); /// ``` @@ -495,9 +511,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("foo"); + /// let mut s = InlineString::try_from("foo").unwrap(); /// assert_eq!(s.pop(), Some('o')); /// assert_eq!(s.pop(), Some('o')); /// assert_eq!(s.pop(), Some('f')); @@ -530,9 +547,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("foo"); + /// let mut s = InlineString::try_from("foo").unwrap(); /// assert_eq!(s.remove(0), 'f'); /// assert_eq!(s.remove(1), 'o'); /// assert_eq!(s.remove(0), 'o'); @@ -558,9 +576,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("foo"); + /// let mut s = InlineString::try_from("foo").unwrap(); /// s.insert(2, 'f'); /// assert!(s == "fofo"); /// ``` @@ -609,9 +628,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("hello"); + /// let mut s = InlineString::try_from("hello").unwrap(); /// unsafe { /// let slice = s.as_mut_slice(); /// assert!(slice == &[104, 101, 108, 108, 111]); @@ -630,9 +650,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let a = InlineString::from("foo"); + /// let a = InlineString::try_from("foo").unwrap(); /// assert_eq!(a.len(), 3); /// ``` #[inline] @@ -664,9 +685,10 @@ impl InlineString { /// # Examples /// /// ``` + /// use std::convert::TryFrom; /// use inlinable_string::InlineString; /// - /// let mut s = InlineString::from("foo"); + /// let mut s = InlineString::try_from("foo").unwrap(); /// s.clear(); /// assert!(s.is_empty()); /// ``` diff --git a/src/lib.rs b/src/lib.rs index f37dcdf..1dcb17e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -98,13 +98,14 @@ pub mod string_ext; pub use inline_string::{InlineString, INLINE_STRING_CAPACITY}; pub use string_ext::StringExt; -use std::borrow::{Borrow, Cow}; +use std::borrow::{Borrow, BorrowMut, Cow}; use std::cmp::Ordering; +use std::convert::TryFrom; use std::fmt; use std::hash; use std::iter; use std::mem; -use std::ops; +use std::ops::{self, RangeBounds}; use std::string::{FromUtf16Error, FromUtf8Error}; /// An owned, grow-able UTF-8 string that allocates short strings inline on the From 2285d03d683ab14a8433febb8dfc2b961aa38da1 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 19:01:08 +0200 Subject: [PATCH 09/21] TryFrom followings and some missed `StringExt` changes --- src/lib.rs | 47 +++++++++++++++++++++++------------------------ src/string_ext.rs | 6 ++---- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1dcb17e..08e235c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -230,13 +230,12 @@ impl AsMut for InlinableString { } } -impl<'a> From<&'a str> for InlinableString { +impl From<&str> for InlinableString { #[inline] - fn from(string: &'a str) -> InlinableString { - if string.len() <= INLINE_STRING_CAPACITY { - InlinableString::Inline(string.into()) - } else { - InlinableString::Heap(string.into()) + fn from(string: &str) -> InlinableString { + match InlineString::try_from(string) { + Ok(s) => InlinableString::Inline(s), + Err(_) => InlinableString::Heap(String::from(string)), } } } @@ -244,10 +243,9 @@ impl<'a> From<&'a str> for InlinableString { impl From for InlinableString { #[inline] fn from(string: String) -> InlinableString { - if string.len() <= INLINE_STRING_CAPACITY { - InlinableString::Inline(string.as_str().into()) - } else { - InlinableString::Heap(string) + match InlineString::try_from(string.as_str()) { + Ok(s) => InlinableString::Inline(s), + Err(_) => InlinableString::Heap(string), } } } @@ -419,7 +417,7 @@ impl_eq! { InlinableString, &'a str } impl_eq! { InlinableString, InlineString } impl_eq! { Cow<'a, str>, InlinableString } -impl<'a> StringExt<'a> for InlinableString { +impl StringExt for InlinableString { #[inline] fn new() -> Self { InlinableString::Inline(InlineString::new()) @@ -537,20 +535,21 @@ impl<'a> StringExt<'a> for InlinableString { #[inline] fn shrink_to_fit(&mut self) { - if self.len() <= INLINE_STRING_CAPACITY { - let demoted = if let InlinableString::Heap(ref s) = *self { - InlineString::from(&s[..]) - } else { - return; - }; - mem::swap(self, &mut InlinableString::Inline(demoted)); - return; - } - - match *self { - InlinableString::Heap(ref mut s) => s.shrink_to_fit(), - _ => panic!("inlinable_string: internal error: this branch should be unreachable"), + let inlined = match *self { + InlinableString::Heap(ref mut s) => match InlineString::try_from(s.as_str()) { + Ok(inlined) => Some(inlined), + Err(_) => { + s.shrink_to_fit(); + None + } + }, + // If already inlined, capacity can't be reduced. + _ => None, }; + + if let Some(inl) = inlined { + *self = InlinableString::Inline(inl); + } } #[inline] diff --git a/src/string_ext.rs b/src/string_ext.rs index 9a2624b..5d40675 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -57,9 +57,7 @@ where /// /// let s = InlinableString::with_capacity(10); /// ``` - fn with_capacity(capacity: usize) -> Self - where - Self: Sized; + fn with_capacity(capacity: usize) -> Self; /// Returns the vector as a string buffer, if possible, taking care not to /// copy it. @@ -534,7 +532,7 @@ fn from_string(s: String) -> S { unsafe { S::from_utf8_unchecked(::into_bytes(s)) } } -impl<'a> StringExt<'a> for String { +impl StringExt for String { #[inline] fn new() -> Self { String::new() From dc066b5c72e7afa296aff24ba7a2ae0a7212f9b7 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 20:25:00 +0200 Subject: [PATCH 10/21] Unified capacity error, fixed `insert`, added insert test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rly, mine error is better — it derives more traits and also implements `Display` and `From`. --- src/inline_string.rs | 172 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 145 insertions(+), 27 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index ab33755..e72b348 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -63,11 +63,6 @@ pub struct InlineString { bytes: [u8; INLINE_STRING_CAPACITY], } -/// The error returned when there is not enough space in a `InlineString` for the -/// requested operation. -#[derive(Debug, PartialEq)] -pub struct NotEnoughSpaceError; - impl AsRef for InlineString { fn as_ref(&self) -> &str { self.assert_sanity(); @@ -99,33 +94,33 @@ impl AsMut<[u8]> for InlineString { } } -/// An error type for `InlineString` TryFrom impl. +/// An error type for `InlineString`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct NotEnoughCapacityError; -impl Display for NotEnoughCapacityError { +pub struct NotEnoughCapacity; +impl Display for NotEnoughCapacity { #[inline] fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - "the length of the string is bigger than maximum capacity of `InlineString`".fmt(fmt) + "the length of the result string is bigger than maximum capacity of `InlineString`".fmt(fmt) } } -impl From for NotEnoughCapacityError { +impl From for NotEnoughCapacity { #[inline] - fn from(x: Infallible) -> NotEnoughCapacityError { + fn from(x: Infallible) -> NotEnoughCapacity { match x {} } } impl TryFrom<&str> for InlineString { - type Error = NotEnoughCapacityError; + type Error = NotEnoughCapacity; - fn try_from(string: &str) -> Result { + fn try_from(string: &str) -> Result { let string_len = string.len(); if string_len <= INLINE_STRING_CAPACITY { // SAFETY: // `string_len` is not bigger than capacity. unsafe { Ok(Self::from_str_unchecked(string)) } } else { - Err(NotEnoughCapacityError) + Err(NotEnoughCapacity) } } } @@ -398,14 +393,14 @@ impl InlineString { /// assert_eq!(s, "foobar"); /// ``` #[inline] - pub fn push_str(&mut self, string: &str) -> Result<(), NotEnoughSpaceError> { + pub fn push_str(&mut self, string: &str) -> Result<(), NotEnoughCapacity> { self.assert_sanity(); let string_len = string.len(); let new_length = self.len() + string_len; if new_length > INLINE_STRING_CAPACITY { - return Err(NotEnoughSpaceError); + return Err(NotEnoughCapacity); } unsafe { @@ -436,14 +431,14 @@ impl InlineString { /// assert_eq!(s, "abc123"); /// ``` #[inline] - pub fn push(&mut self, ch: char) -> Result<(), NotEnoughSpaceError> { + pub fn push(&mut self, ch: char) -> Result<(), NotEnoughCapacity> { self.assert_sanity(); let char_len = ch.len_utf8(); let new_length = self.len() + char_len; if new_length > INLINE_STRING_CAPACITY { - return Err(NotEnoughSpaceError); + return Err(NotEnoughCapacity); } { @@ -589,15 +584,16 @@ impl InlineString { /// If `idx` does not lie on a character boundary or is out of bounds, then /// this function will panic. #[inline] - pub fn insert(&mut self, idx: usize, ch: char) -> Result<(), NotEnoughSpaceError> { - self.assert_sanity(); - assert!(idx <= self.len()); + pub fn insert(&mut self, idx: usize, ch: char) -> Result<(), NotEnoughCapacity> { + let mut bits = [0; 4]; + self.insert_str(idx, ch.encode_utf8(&mut bits)) + } let char_len = ch.len_utf8(); let new_length = self.len() + char_len; - if new_length > INLINE_STRING_CAPACITY { - return Err(NotEnoughSpaceError); + if len_sum > INLINE_STRING_CAPACITY { + return Err(NotEnoughCapacity); } unsafe { @@ -698,11 +694,126 @@ impl InlineString { self.length = 0; self.assert_sanity(); } + + /// Splits the string into two at the given index. + /// + /// Returns a new buffer. `self` contains bytes `[0, at)`, and + /// the returned buffer contains bytes `[at, len)`. `at` must be on the + /// boundary of a UTF-8 code point. + /// + /// Note that the capacity of `self` does not change. + /// + /// # Panics + /// + /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last + /// code point of the string. + /// + /// # Examples + /// + /// ``` + /// # fn main() { + /// use std::convert::TryFrom; + /// use inlinable_string::InlineString; + /// + /// let mut hello = InlineString::try_from("Hello, World!").unwrap(); + /// let world = hello.split_off(7); + /// assert_eq!(hello, "Hello, "); + /// assert_eq!(world, "World!"); + /// # } + /// ``` + #[inline] + #[must_use = "use `.truncate()` if you don't need the other half"] + pub fn split_off(&mut self, at: usize) -> Self { + // String index does all bounds checks. + let s: &str = &self[at..]; + + // SAFETY: + // `s` is a part of `InlineString`, thus its length is never bigger + // than `INLINE_STRING_CAPACITY`. + let right_part = unsafe { Self::from_str_unchecked(s) }; + self.length = at as u8; + + right_part + } + + /// Retains only the characters specified by the predicate. + /// + /// In other words, remove all characters `c` such that `f(c)` returns `false`. + /// This method operates in place, visiting each character exactly once in the + /// original order, and preserves the order of the retained characters. + /// + /// # Examples + /// + /// ``` + /// use std::convert::TryFrom; + /// use inlinable_string::InlineString; + /// + /// let mut s = InlineString::try_from("f_o_ob_ar").unwrap(); + /// + /// s.retain(|c| c != '_'); + /// + /// assert_eq!(s, "foobar"); + /// ``` + /// + /// The exact order may be useful for tracking external state, like an index. + /// + /// ``` + /// use std::convert::TryFrom; + /// use inlinable_string::InlineString; + /// + /// let mut s = InlineString::try_from("abcde").unwrap(); + /// let keep = [false, true, true, false, true]; + /// let mut i = 0; + /// s.retain(|_| (keep[i], i += 1).0); + /// assert_eq!(s, "bce"); + /// ``` + #[inline] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(char) -> bool, + { + // Since `InlineString` is a little stack-allocated buffer, + // there's almost no difference whether it's retained in-place + // or not. + + let mut buffer = Self::new(); + let buf = &mut buffer.bytes; + let mut ptr = 0; + let mut copy_bytes = 0; + + let s = &self[..]; + s.char_indices().for_each(|(idx, ch)| { + if f(ch) { + copy_bytes += ch.len_utf8(); + } else if copy_bytes > 0 { + let next_ptr = ptr + copy_bytes; + buf[ptr..next_ptr].copy_from_slice(&s.as_bytes()[idx - copy_bytes..idx]); + + ptr = next_ptr; + copy_bytes = 0; + } + }); + + if copy_bytes > 0 { + // If the whole string is retained, do nothing. + if copy_bytes == s.len() { + return; + } + + let next_ptr = ptr + copy_bytes; + buf[ptr..next_ptr].copy_from_slice(&s.as_bytes()[s.len() - copy_bytes..]); + + ptr = next_ptr; + } + + buffer.length = ptr as u8; + *self = buffer; + } } #[cfg(test)] mod tests { - use super::{InlineString, NotEnoughSpaceError, INLINE_STRING_CAPACITY}; + use super::{InlineString, NotEnoughCapacity, TryFrom, INLINE_STRING_CAPACITY}; #[test] fn test_push_str() { @@ -712,7 +823,7 @@ mod tests { let long_str = "this is a really long string that is much larger than INLINE_STRING_CAPACITY and so cannot be stored inline."; - assert_eq!(s.push_str(long_str), Err(NotEnoughSpaceError)); + assert_eq!(s.push_str(long_str), Err(NotEnoughCapacity)); assert_eq!(s, "small"); } @@ -724,7 +835,7 @@ mod tests { assert!(s.push('a').is_ok()); } - assert_eq!(s.push('a'), Err(NotEnoughSpaceError)); + assert_eq!(s.push('a'), Err(NotEnoughCapacity)); } #[test] @@ -735,7 +846,14 @@ mod tests { assert!(s.insert(0, 'a').is_ok()); } - assert_eq!(s.insert(0, 'a'), Err(NotEnoughSpaceError)); + assert_eq!(s.insert(0, 'a'), Err(NotEnoughCapacity)); + } + + #[test] + #[should_panic] + fn insert_panic() { + let mut s = InlineString::try_from("й").unwrap(); + let _ = s.insert(1, 'q'); } #[test] From 18da0c7910312498d7a14f1258ec92ce8a4bd54e Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 20:37:47 +0200 Subject: [PATCH 11/21] All `insert_str`-related code. --- src/inline_string.rs | 37 +++++++++++++++++++--------- src/lib.rs | 38 +++++++++++++++++++++++++++++ src/string_ext.rs | 58 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 121 insertions(+), 12 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index e72b348..521a32d 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -589,28 +589,43 @@ impl InlineString { self.insert_str(idx, ch.encode_utf8(&mut bits)) } - let char_len = ch.len_utf8(); - let new_length = self.len() + char_len; + /// Inserts a string into the string buffer at byte position `idx`. + /// + /// # Examples + /// + /// ``` + /// use std::convert::TryFrom; + /// use inlinable_string::InlineString; + /// + /// let mut s = InlineString::try_from("foo").unwrap(); + /// s.insert_str(2, "bar"); + /// assert!(s == "fobaro"); + /// ``` + #[inline] + pub fn insert_str(&mut self, idx: usize, string: &str) -> Result<(), NotEnoughCapacity> { + let len = self.len(); + let amt = string.len(); + let len_sum = len + amt; if len_sum > INLINE_STRING_CAPACITY { return Err(NotEnoughCapacity); } + // SAFETY: + // `idx` is a char boundary and <= `len`, thus it's also `<=` lengths' sum, + // lengths' sum is checked to be `<=` than `INLINE_STRING_CAPACITY`, + // and `string` is a well-formed `str`. unsafe { + assert!(self.is_char_boundary(idx)); ptr::copy( self.bytes.as_ptr().add(idx), - self.bytes.as_mut_ptr().add(idx + char_len), - self.len() - idx, - ); - let mut slice = &mut self.bytes[idx..idx + char_len]; - write!(&mut slice, "{}", ch).expect( - "inlinable_string: internal error: we should have enough space, we - checked above", + self.bytes.as_mut_ptr().add(idx + amt), + len - idx, ); + ptr::copy_nonoverlapping(string.as_ptr(), self.bytes.as_mut_ptr().add(idx), amt); + self.set_len(len_sum); } - self.length = new_length as u8; - self.assert_sanity(); Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 08e235c..d61b116 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -629,6 +629,29 @@ impl StringExt for InlinableString { mem::swap(self, &mut InlinableString::Heap(promoted)); } + #[inline] + fn insert_str(&mut self, idx: usize, string: &str) { + let promoted = match *self { + InlinableString::Heap(ref mut s) => { + s.insert_str(idx, string); + return; + } + InlinableString::Inline(ref mut s) => { + if s.insert_str(idx, string).is_ok() { + return; + } + + let mut promoted = String::with_capacity(s.len() + string.len()); + promoted.push_str(&s[..idx]); + promoted.push_str(string); + promoted.push_str(&s[idx..]); + promoted + } + }; + + mem::swap(self, &mut InlinableString::Heap(promoted)); + } + #[inline] unsafe fn as_mut_slice(&mut self) -> &mut [u8] { match *self { @@ -721,6 +744,21 @@ mod tests { ); } + #[test] + fn test_insert_str() { + let mut s = InlinableString::new(); + + for _ in 0..(INLINE_STRING_CAPACITY / 3) { + s.insert_str(0, "foo"); + } + s.insert_str(0, "foo"); + + assert_eq!( + s, + String::from_iter((0..(INLINE_STRING_CAPACITY / 3) + 1).map(|_| "foo")) + ); + } + // Next, some general sanity tests. #[test] diff --git a/src/string_ext.rs b/src/string_ext.rs index 5d40675..446636f 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -404,7 +404,44 @@ where /// /// If `idx` does not lie on a character boundary or is out of bounds, then /// this function will panic. - fn insert(&mut self, idx: usize, ch: char); + #[inline] + fn insert(&mut self, idx: usize, ch: char) { + let mut bits = [0; 4]; + self.insert_str(idx, ch.encode_utf8(&mut bits)); + } + + /// Inserts a string into the string buffer at byte position `idx`. + /// + /// # Warning + /// + /// This is an O(n) operation as it requires copying every element in the + /// buffer. + /// + /// # Examples + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let mut s = InlinableString::from("foo"); + /// s.insert_str(2, "bar"); + /// assert!(s == "fobaro"); + /// ``` + /// + /// # Panics + /// + /// If `idx` does not lie on a character boundary or is out of bounds, then + /// this function will panic. + fn insert_str(&mut self, idx: usize, string: &str); + /* It looks like `insert_str` is better manually implemented, + * while provided `insert` is mostly okay. + { + let mut idx = idx; + string.chars().for_each(|ch| { + self.insert(idx, ch); + idx += ch.len_utf8(); + }); + } + */ /// Views the string buffer as a mutable sequence of bytes. /// @@ -628,6 +665,11 @@ impl StringExt for String { String::insert(self, idx, ch) } + #[inline] + fn insert_str(&mut self, idx: usize, string: &str) { + String::insert_str(self, idx, string) + } + #[inline] unsafe fn as_mut_slice(&mut self) -> &mut [u8] { &mut *(self.as_mut_str() as *mut str as *mut [u8]) @@ -850,6 +892,13 @@ mod provided_methods_tests { assert_eq!(output, "Hello \u{FFFD}World"); } + #[test] + fn test_insert_str() { + let mut s = ReqImpl::from("foo"); + s.insert_str(2, "bar"); + assert!(s == "fobaro"); + } + #[test] fn test_into_bytes() { let s = ReqImpl::from("hello"); @@ -990,4 +1039,11 @@ mod std_string_stringext_sanity_tests { assert_eq!(StringExt::pop(&mut s), Some('f')); assert_eq!(StringExt::pop(&mut s), None); } + + #[test] + fn test_insert_str() { + let mut s = String::from("foo"); + StringExt::insert_str(&mut s, 1, "bar"); + assert_eq!(s, "fbaroo"); + } } From 285435d3610623df3bf50b775805a7bac8e00f81 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 20:42:09 +0200 Subject: [PATCH 12/21] All `remove_range`-related code. --- src/inline_string.rs | 57 ++++++++++++++++++++++++++- src/lib.rs | 11 ++++++ src/string_ext.rs | 92 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 1 deletion(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index 521a32d..35c7ec5 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -563,7 +563,62 @@ impl InlineString { ch } - } + + /// Removes the specified range from the string buffer. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::convert::TryFrom; + /// use inlinable_string::InlineString; + /// + /// let mut s = InlineString::try_from("α is not β!").unwrap(); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Remove the range up until the β from the string + /// s.remove_range(..beta_offset); + /// + /// assert_eq!(s, "β!"); + /// + /// // A full range clears the string + /// s.remove_range(..); + /// assert_eq!(s, ""); + /// ``` + #[inline] + pub fn remove_range(&mut self, range: R) + where + R: RangeBounds, + { + use ops::Bound::*; + + let len = self.len(); + let start = match range.start_bound() { + Included(&n) => n, + Excluded(&n) => n + 1, + Unbounded => 0, + }; + let end = match range.end_bound() { + Included(&n) => n + 1, + Excluded(&n) => n, + Unbounded => len, + }; + + // Checking bounds. + let s: &str = &self; + assert!(s.is_char_boundary(end) && start <= end && s.is_char_boundary(start)); + + // Start and end are checked, remove everything inside that range. + self.bytes.copy_within(end.., start); + self.length -= (end - start) as u8; } /// Inserts a character into the string buffer at byte position `idx`. diff --git a/src/lib.rs b/src/lib.rs index d61b116..c62b537 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -606,6 +606,17 @@ impl StringExt for InlinableString { } } + #[inline] + fn remove_range(&mut self, range: R) + where + R: RangeBounds, + { + match self { + InlinableString::Heap(s) => s.remove_range(range), + InlinableString::Inline(s) => s.remove_range(range), + } + } + #[inline] fn insert(&mut self, idx: usize, ch: char) { let promoted = match *self { diff --git a/src/string_ext.rs b/src/string_ext.rs index 446636f..c1d1722 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -383,6 +383,68 @@ where /// ``` fn remove(&mut self, idx: usize) -> char; + /// Removes the specified range from the string buffer. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let mut s = InlinableString::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Remove the range up until the β from the string + /// s.remove_range(..beta_offset); + /// + /// assert_eq!(s, "β is beta"); + /// + /// // A full range clears the string + /// s.remove_range(..); + /// assert_eq!(s, ""); + /// ``` + #[inline] + fn remove_range(&mut self, range: R) + where + R: RangeBounds, + { + use ops::Bound::*; + + let len = self.len(); + let start = match range.start_bound() { + Included(&n) => n, + Excluded(&n) => n + 1, + Unbounded => 0, + }; + let end = match range.end_bound() { + Included(&n) => n + 1, + Excluded(&n) => n, + Unbounded => len, + }; + + // Checking bounds. + assert!(start <= end); + + let diff = end - start; + + let mut sum = 0; + while sum < diff { + sum += self.remove(start).len_utf8(); + } + + // Sanity check: number of deleted bytes must be equal + // to the range length. + assert_eq!(diff, sum); + } + /// Inserts a character into the string buffer at byte position `idx`. /// /// # Warning @@ -660,6 +722,14 @@ impl StringExt for String { String::remove(self, idx) } + #[inline] + fn remove_range(&mut self, range: R) + where + R: RangeBounds, + { + String::drain(self, range); + } + #[inline] fn insert(&mut self, idx: usize, ch: char) { String::insert(self, idx, ch) @@ -935,6 +1005,21 @@ mod provided_methods_tests { s.push_str("bar"); assert_eq!(s, "foobar"); } + + #[test] + fn test_remove_range() { + let mut s = ReqImpl::from("α is alpha, β is beta"); + let beta_offset = s.find('β').unwrap_or(s.len()); + + // Remove the range up until the β from the string + s.remove_range(..beta_offset); + + assert_eq!(s, "β is beta"); + + // A full range clears the string + s.remove_range(..); + assert_eq!(s, ""); + } } #[cfg(test)] @@ -1046,4 +1131,11 @@ mod std_string_stringext_sanity_tests { StringExt::insert_str(&mut s, 1, "bar"); assert_eq!(s, "fbaroo"); } + + #[test] + fn test_remove_range() { + let mut s = String::from("foobar"); + StringExt::remove_range(&mut s, 1..3); + assert_eq!(s, "fbar"); + } } From c26457bd2e7b818b6d4e4775a35cacd07a642e16 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 20:46:23 +0200 Subject: [PATCH 13/21] A little change in docs, no trait objects. --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c62b537..1da3e50 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ //! //! // This method can work on strings potentially stored inline on the stack, //! // on the heap, or plain old `std::string::String`s! -//! fn takes_a_string_reference(string: &mut StringExt) { +//! fn takes_a_string_reference(string: &mut impl StringExt) { //! // Do something with the string... //! string.push_str("it works!"); //! } From bea152b2a35d2965734c465e59f5200054d1a346 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 20:50:40 +0200 Subject: [PATCH 14/21] All `split_off`-related code. --- src/lib.rs | 33 +++++++++++++++++++++++++++++++++ src/string_ext.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 1da3e50..3dd5589 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -678,6 +678,21 @@ impl StringExt for InlinableString { InlinableString::Inline(ref s) => s.len(), } } + + #[inline] + #[must_use = "use `.truncate()` if you don't need the other half"] + fn split_off(&mut self, at: usize) -> Self { + match self { + InlinableString::Inline(s) => Self::Inline(s.split_off(at)), + InlinableString::Heap(s) => match InlineString::try_from(&s[at..]) { + Ok(inlined) => { + s.truncate(at); + Self::Inline(inlined) + } + Err(_) => Self::Heap(s.split_off(at)), + }, + } + } } #[cfg(test)] @@ -772,6 +787,24 @@ mod tests { // Next, some general sanity tests. + #[test] + fn test_split_off() { + // This test checks `Heap -> (Heap, Inline)` case of the function; + // `Heap -> (Heap, Heap)` is tested by `String` itself, + // `Inline -> (Inline, Inline)` is tested by `InlineString`. + + let mut inlinable: InlinableString = LONG_STR.into(); + let len = LONG_STR.len(); + assert!(len > INLINE_STRING_CAPACITY as usize); + + let at = len - 7; + let right_part = inlinable.split_off(at); + assert_eq!(&LONG_STR[..at], inlinable); + assert_eq!(&LONG_STR[at..], right_part); + assert!(matches!(inlinable, InlinableString::Heap(_))); + assert!(matches!(right_part, InlinableString::Inline(_))); + } + #[test] fn test_new() { let s = ::new(); diff --git a/src/string_ext.rs b/src/string_ext.rs index c1d1722..726164d 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -623,6 +623,34 @@ where self.borrow_mut() } + /// Splits the string into two at the given index. + /// + /// Returns a new buffer. `self` contains bytes `[0, at)`, and + /// the returned buffer contains bytes `[at, len)`. `at` must be on the + /// boundary of a UTF-8 code point. + /// + /// Note that the capacity of `self` does not change. + /// + /// # Panics + /// + /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last + /// code point of the string. + /// + /// # Examples + /// + /// ``` + /// # fn main() { + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let mut hello = InlinableString::from("Hello, World!"); + /// let world = hello.split_off(7); + /// assert_eq!(hello, "Hello, "); + /// assert_eq!(world, "World!"); + /// # } + /// ``` + #[must_use = "use `.truncate()` if you don't need the other half"] + fn split_off(&mut self, at: usize) -> Self; + /// Internal function to decrease the numbers of unsafe. #[inline] fn from_string(s: String) -> S { @@ -749,6 +777,11 @@ impl StringExt for String { fn len(&self) -> usize { String::len(self) } + + #[inline] + fn split_off(&mut self, at: usize) -> Self { + ::split_off(self, at) + } } #[cfg(test)] @@ -857,6 +890,9 @@ mod provided_methods_tests { fn len(&self) -> usize { self.0.len() } + fn split_off(&mut self, at: usize) -> Self { + Self(self.0.split_off(at)) + } } #[test] @@ -1138,4 +1174,12 @@ mod std_string_stringext_sanity_tests { StringExt::remove_range(&mut s, 1..3); assert_eq!(s, "fbar"); } + + #[test] + fn test_split_off() { + let mut s = String::from("foobar"); + let right_part = StringExt::split_off(&mut s, 3); + assert_eq!(s, "foo"); + assert_eq!(right_part, "bar"); + } } From 1097973692dff5c4a3928449a1df9fcdab8e1c9a Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 20:51:55 +0200 Subject: [PATCH 15/21] All `into_boxed_str`-related code --- src/string_ext.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/string_ext.rs b/src/string_ext.rs index 726164d..eb31e7d 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -623,6 +623,33 @@ where self.borrow_mut() } + /// Converts this `String` into a [`Box`]`<`[`str`]`>`. + /// + /// This will drop any excess capacity. + /// + /// [`Box`]: https://doc.rust-lang.org/std/boxed/struct.Box.html + /// [`str`]: https://doc.rust-lang.org/std/primitive.str.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let s = InlinableString::from("hello"); + /// + /// let b = s.into_boxed_str(); + /// ``` + #[inline] + fn into_boxed_str(self) -> Box + where + Self: Into, + { + let s = self.into(); + ::into_boxed_str(s) + } + /// Splits the string into two at the given index. /// /// Returns a new buffer. `self` contains bytes `[0, at)`, and From 66761f96ca14aa0effa21f0275b38e881fa919c7 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 20:58:46 +0200 Subject: [PATCH 16/21] All `retain`-related code --- src/lib.rs | 11 ++++++ src/string_ext.rs | 87 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 3dd5589..8bc511a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -693,6 +693,17 @@ impl StringExt for InlinableString { }, } } + + #[inline] + fn retain(&mut self, f: F) + where + F: FnMut(char) -> bool, + { + match self { + Self::Inline(s) => s.retain(f), + Self::Heap(s) => s.retain(f), + } + } } #[cfg(test)] diff --git a/src/string_ext.rs b/src/string_ext.rs index eb31e7d..619eb2a 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -678,6 +678,63 @@ where #[must_use = "use `.truncate()` if you don't need the other half"] fn split_off(&mut self, at: usize) -> Self; + /// Retains only the characters specified by the predicate. + /// + /// In other words, remove all characters `c` such that `f(c)` returns `false`. + /// This method operates in place, visiting each character exactly once in the + /// original order, and preserves the order of the retained characters. + /// + /// # Examples + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let mut s = InlinableString::from("f_o_ob_ar"); + /// + /// s.retain(|c| c != '_'); + /// + /// assert_eq!(s, "foobar"); + /// ``` + /// + /// The exact order may be useful for tracking external state, like an index. + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let mut s = InlinableString::from("abcde"); + /// let keep = [false, true, true, false, true]; + /// let mut i = 0; + /// s.retain(|_| (keep[i], i += 1).0); + /// assert_eq!(s, "bce"); + /// ``` + #[inline] + fn retain(&mut self, mut f: F) + where + F: FnMut(char) -> bool, + { + /// Insanely ineffective implementation, + /// yet it is done "in-place" if you don't count + /// a lot of stack space. + #[inline] + fn recursive_retain(self_: &mut SE, f: &mut F) + where + F: FnMut(char) -> bool, + SE: StringExt, + { + match self_.pop() { + Some(ch) => { + recursive_retain(self_, f); + if f(ch) { + self_.push(ch); + } + } + None => (), + } + } + + recursive_retain(self, &mut f); + } + /// Internal function to decrease the numbers of unsafe. #[inline] fn from_string(s: String) -> S { @@ -809,6 +866,14 @@ impl StringExt for String { fn split_off(&mut self, at: usize) -> Self { ::split_off(self, at) } + + #[inline] + fn retain(&mut self, f: F) + where + F: FnMut(char) -> bool, + { + ::retain(self, f) + } } #[cfg(test)] @@ -1083,6 +1148,21 @@ mod provided_methods_tests { s.remove_range(..); assert_eq!(s, ""); } + + #[test] + fn test_retain() { + let mut s = ReqImpl::from("f_o_ob_ar"); + + s.retain(|c| c != '_'); + + assert_eq!(s, "foobar"); + + let mut s = ReqImpl::from("abcde"); + let keep = [false, true, true, false, true]; + let mut i = 0; + s.retain(|_| (keep[i], i += 1).0); + assert_eq!(s, "bce"); + } } #[cfg(test)] @@ -1209,4 +1289,11 @@ mod std_string_stringext_sanity_tests { assert_eq!(s, "foo"); assert_eq!(right_part, "bar"); } + + #[test] + fn test_retain() { + let mut s = String::from("--f-oo-b-a-r---"); + StringExt::retain(&mut s, |ch| ch != '-'); + assert_eq!(s, "foobar"); + } } From ba73cc0d74717a46069b18cd1071cfec0cf0be3e Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 21:01:28 +0200 Subject: [PATCH 17/21] All `replace_range`-related code --- src/lib.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++ src/string_ext.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 8bc511a..a72b575 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -704,6 +704,61 @@ impl StringExt for InlinableString { Self::Heap(s) => s.retain(f), } } + + #[inline] + fn replace_range(&mut self, range: R, replace_with: &str) + where + R: RangeBounds, + { + let promoted = match self { + Self::Heap(s) => { + s.replace_range(range, replace_with); + return; + } + Self::Inline(s) => { + use ops::Bound::*; + + let len = s.len(); + let start = match range.start_bound() { + Included(&n) => n, + Excluded(&n) => n + 1, + Unbounded => 0, + }; + let end = match range.end_bound() { + Included(&n) => n + 1, + Excluded(&n) => n, + Unbounded => len, + }; + + // String index does all bounds checks. + let range_len = s[start..end].len(); + + let new_len = len - range_len + replace_with.len(); + if INLINE_STRING_CAPACITY >= new_len { + let mut ss = InlineString::new(); + + // SAFETY: + // Inline capacity is checked to be no less than new length, + // and all three parts are checked to be valid `str`. + unsafe { + let buf = ss.as_bytes_mut(); + // Copy the [end..len] to its new place, then copy `replace_with`. + let replace_end = start + replace_with.len(); + buf.copy_within(end..len, replace_end); + buf[start..replace_end].copy_from_slice(replace_with.as_bytes()); + + ss.set_len(new_len); + } + + Self::Inline(ss) + } else { + Self::Heap([&s[..start], replace_with, &s[end..]].concat()) + } + } + }; + + *self = promoted; + } } #[cfg(test)] @@ -796,6 +851,15 @@ mod tests { ); } + #[test] + fn test_replace_range() { + let mut s = InlinableString::from("smol str"); + assert!(matches!(&s, InlinableString::Inline(_))); + + s.replace_range(1..7, LONG_STR); + assert_eq!(s, ["s", LONG_STR, "r"].concat()); + } + // Next, some general sanity tests. #[test] diff --git a/src/string_ext.rs b/src/string_ext.rs index 619eb2a..29a23e3 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -735,6 +735,49 @@ where recursive_retain(self, &mut f); } + /// Removes the specified range in the string, + /// and replaces it with the given string. + /// The given string doesn't need to be the same length as the range. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// [`char`]: https://doc.rust-lang.org/std/primitive.char.html + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use inlinable_string::{InlinableString, StringExt}; + /// + /// let mut s = InlinableString::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Replace the range up until the β from the string + /// s.replace_range(..beta_offset, "Α is capital alpha; "); + /// assert_eq!(s, "Α is capital alpha; β is beta"); + /// ``` + #[inline] + fn replace_range(&mut self, range: R, replace_with: &str) + where + R: RangeBounds, + { + use ops::Bound::*; + + let start = match range.start_bound() { + Included(&n) => n, + Excluded(&n) => n + 1, + Unbounded => 0, + }; + + self.remove_range(range); + self.insert_str(start, replace_with); + } +} + /// Internal function to decrease the numbers of unsafe. #[inline] fn from_string(s: String) -> S { @@ -874,6 +917,14 @@ impl StringExt for String { { ::retain(self, f) } + + #[inline] + fn replace_range(&mut self, range: R, replace_with: &str) + where + R: RangeBounds, + { + ::replace_range(self, range, replace_with) + } } #[cfg(test)] @@ -1149,6 +1200,16 @@ mod provided_methods_tests { assert_eq!(s, ""); } + #[test] + fn test_replace_range() { + let mut s = ReqImpl::from("α is alpha, β is beta"); + let beta_offset = s.find('β').unwrap_or(s.len()); + + // Replace the range up until the β from the string + s.replace_range(..beta_offset, "Α is capital alpha; "); + assert_eq!(s, "Α is capital alpha; β is beta"); + } + #[test] fn test_retain() { let mut s = ReqImpl::from("f_o_ob_ar"); @@ -1296,4 +1357,11 @@ mod std_string_stringext_sanity_tests { StringExt::retain(&mut s, |ch| ch != '-'); assert_eq!(s, "foobar"); } + + #[test] + fn test_replace_range() { + let mut s = String::from("foobar"); + StringExt::replace_range(&mut s, 1..5, "qwerty"); + assert_eq!(s, "fqwertyr"); + } } From 23b8fa13ea8f56e9ec80d5eae609b5caaeb0b0de Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Mon, 10 Aug 2020 21:02:27 +0200 Subject: [PATCH 18/21] Impls of `From` trait for `String`; 1.41+ Rust only --- src/string_ext.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/string_ext.rs b/src/string_ext.rs index 29a23e3..f2dc64e 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -927,6 +927,29 @@ impl StringExt for String { } } +/// Implementation of some traits from stdlib for `String` type. +/// This is 1.41.0+ code; before 1.41 orphan rules were too strict. +mod string_impls { + use crate::{InlinableString, InlineString}; + + impl From for String { + #[inline] + fn from(s: InlineString) -> String { + String::from(&*s) + } + } + + impl From for String { + #[inline] + fn from(s: InlinableString) -> String { + match s { + InlinableString::Heap(s) => s, + InlinableString::Inline(s) => String::from(s), + } + } + } +} + #[cfg(test)] mod provided_methods_tests { From 4fc8729c70a618bbd1334dd5de6298d78a937af6 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Tue, 11 Aug 2020 10:26:34 +0200 Subject: [PATCH 19/21] Bug fix. The new `InlineString::remove` implementation was bugged for all strings, and also UB-producing for non-ASCII strings. --- src/inline_string.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index 35c7ec5..e6758d8 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -549,17 +549,25 @@ impl InlineString { /// assert_eq!(s.remove(0), 'f'); /// assert_eq!(s.remove(1), 'o'); /// assert_eq!(s.remove(0), 'o'); + /// assert_eq!(s, ""); /// ``` #[inline] pub fn remove(&mut self, idx: usize) -> char { - self.assert_sanity(); - let ch = match self[idx..].chars().next() { Some(ch) => ch, None => panic!("cannot remove a char from the end of a string"), }; - self.bytes.copy_within(idx + ch.len_utf8().., idx); + let ch_len = ch.len_utf8(); + let len = self.len(); + // SAFETY: + // `idx` was checked through string indexing; + // `ch` was produced by `chars` iterator, + // so `(idx + ch_len)..len` range is valid; + unsafe { + self.bytes.copy_within(idx + ch_len..len, idx); + self.set_len(len - ch_len); + } ch } From 73132ebc6e27a979ba0f374147c881abf7e971cb Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Tue, 11 Aug 2020 10:59:21 +0200 Subject: [PATCH 20/21] Removed unsound `AsMut<[u8]>` impl There are reasons for `as_bytes_mut` function being unsafe for strings, so this impl does not make sense, as it allows getting mutable slice without `unsafe` keyword. --- src/inline_string.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index e6758d8..81c3078 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -85,15 +85,6 @@ impl AsMut for InlineString { } } -impl AsMut<[u8]> for InlineString { - #[inline] - fn as_mut(&mut self) -> &mut [u8] { - self.assert_sanity(); - let length = self.len(); - &mut self.bytes[0..length] - } -} - /// An error type for `InlineString`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct NotEnoughCapacity; From 0cb7c5bbd6e3dd824aa5235580dcd96e954d5a55 Mon Sep 17 00:00:00 2001 From: ImmConCon Date: Tue, 11 Aug 2020 12:11:06 +0200 Subject: [PATCH 21/21] Test compilation fix The test struct for provided methods was implementing the wrong required method. --- src/inline_string.rs | 2 +- src/string_ext.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/inline_string.rs b/src/inline_string.rs index 81c3078..c8aa6b6 100644 --- a/src/inline_string.rs +++ b/src/inline_string.rs @@ -921,7 +921,7 @@ mod tests { #[test] #[should_panic] fn insert_panic() { - let mut s = InlineString::try_from("й").unwrap(); + let mut s = InlineString::try_from("щ").unwrap(); let _ = s.insert(1, 'q'); } diff --git a/src/string_ext.rs b/src/string_ext.rs index f2dc64e..90ac678 100644 --- a/src/string_ext.rs +++ b/src/string_ext.rs @@ -1050,8 +1050,8 @@ mod provided_methods_tests { fn remove(&mut self, idx: usize) -> char { self.0.remove(idx) } - fn insert(&mut self, idx: usize, ch: char) { - self.0.insert(idx, ch) + fn insert_str(&mut self, idx: usize, string: &str) { + self.0.insert_str(idx, string) } fn len(&self) -> usize { self.0.len()