diff --git a/src/common/raw.rs b/src/common/raw.rs index 070a62c..97d0353 100644 --- a/src/common/raw.rs +++ b/src/common/raw.rs @@ -1,11 +1,18 @@ use std::fmt; use std::fmt::Formatter; +use super::Result; + #[inline(always)] pub(crate) const fn is_continuation(_: u8) -> bool { false } +#[inline(always)] +pub(crate) fn validate_bytes(_: &[u8]) -> Result<()> { + Ok(()) +} + #[inline(always)] pub(crate) fn decode_code_point(_: &[u8]) -> u32 { unreachable!(); diff --git a/src/lib.rs b/src/lib.rs index 203772f..32974ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -187,12 +187,10 @@ macro_rules! if_raw_str { }; } -if_raw_str! { - macro_rules! expect_encoded { - ( $result:expr ) => { - $result.expect("invalid raw bytes") - }; - } +macro_rules! expect_encoded { + ( $result:expr ) => { + $result.expect("invalid raw bytes") + }; } #[cfg_attr( @@ -224,6 +222,7 @@ if_raw_str! { mod raw_str; pub use raw_str::RawOsStr; + pub use raw_str::RawOsStrCow; pub use raw_str::RawOsString; } @@ -266,6 +265,43 @@ type Result = result::Result; pub trait OsStrBytes: private::Sealed + ToOwned { /// Converts a byte string into an equivalent platform-native string. /// + /// # Panics + /// + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsStr; + /// # use std::io; + /// + /// use os_str_bytes::OsStrBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.to_raw_bytes(); + /// assert_eq!(os_string, OsStr::assert_from_raw_bytes(os_bytes)); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + #[inline] + #[must_use = "method should not be used for validation"] + #[track_caller] + fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> + where + S: Into>, + { + expect_encoded!(Self::from_raw_bytes(string)) + } + + /// Converts a byte string into an equivalent platform-native string. + /// + /// [`assert_from_raw_bytes`] should almost always be used instead. For + /// more information, see [`EncodingError`]. + /// /// # Errors /// /// See documentation for [`EncodingError`]. @@ -286,6 +322,7 @@ pub trait OsStrBytes: private::Sealed + ToOwned { /// # Ok::<_, io::Error>(()) /// ``` /// + /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes fn from_raw_bytes<'a, S>(string: S) -> Result> where S: Into>; @@ -360,6 +397,40 @@ impl OsStrBytes for Path { pub trait OsStringBytes: private::Sealed + Sized { /// Converts a byte string into an equivalent platform-native string. /// + /// # Panics + /// + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsString; + /// # use std::io; + /// + /// use os_str_bytes::OsStringBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.clone().into_raw_vec(); + /// assert_eq!(os_string, OsString::assert_from_raw_vec(os_bytes)); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: self#encoding + #[inline] + #[must_use = "method should not be used for validation"] + #[track_caller] + fn assert_from_raw_vec(string: Vec) -> Self { + expect_encoded!(Self::from_raw_vec(string)) + } + + /// Converts a byte string into an equivalent platform-native string. + /// + /// [`assert_from_raw_vec`] should almost always be used instead. For more + /// information, see [`EncodingError`]. + /// /// # Errors /// /// See documentation for [`EncodingError`]. @@ -380,6 +451,7 @@ pub trait OsStringBytes: private::Sealed + Sized { /// # Ok::<_, io::Error>(()) /// ``` /// + /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec fn from_raw_vec(string: Vec) -> Result; /// Converts a platform-native string into an equivalent byte string. @@ -433,7 +505,14 @@ mod private { use std::path::Path; use std::path::PathBuf; + if_raw_str! { + use std::borrow::Cow; + + use super::RawOsStr; + } + pub trait Sealed {} + impl Sealed for char {} impl Sealed for OsStr {} impl Sealed for OsString {} @@ -441,4 +520,8 @@ mod private { impl Sealed for PathBuf {} impl Sealed for &str {} impl Sealed for &String {} + + if_raw_str! { + impl Sealed for Cow<'_, RawOsStr> {} + } } diff --git a/src/raw_str.rs b/src/raw_str.rs index 4594614..07afdab 100644 --- a/src/raw_str.rs +++ b/src/raw_str.rs @@ -27,6 +27,7 @@ use super::imp; use super::imp::raw; use super::iter::Split; use super::pattern::Encoded as EncodedPattern; +use super::private; use super::Pattern; #[cfg(not(feature = "memchr"))] @@ -106,7 +107,7 @@ unsafe impl TransmuteBox for [u8] {} pub struct RawOsStr([u8]); impl RawOsStr { - fn from_raw_bytes_unchecked(string: &[u8]) -> &Self { + const fn from_inner(string: &[u8]) -> &Self { // SAFETY: This struct has a layout that makes this operation safe. unsafe { mem::transmute(string) } } @@ -136,9 +137,7 @@ impl RawOsStr { #[must_use] pub fn new(string: &OsStr) -> Cow<'_, Self> { match imp::os_str_to_bytes(string) { - Cow::Borrowed(string) => { - Cow::Borrowed(Self::from_raw_bytes_unchecked(string)) - } + Cow::Borrowed(string) => Cow::Borrowed(Self::from_inner(string)), Cow::Owned(string) => Cow::Owned(RawOsString(string)), } } @@ -163,7 +162,76 @@ impl RawOsStr { #[inline] #[must_use] pub fn from_str(string: &str) -> &Self { - Self::from_raw_bytes_unchecked(string.as_bytes()) + Self::from_inner(string.as_bytes()) + } + + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Panics + /// + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// let raw_bytes = raw.as_raw_bytes(); + /// assert_eq!(&*raw, RawOsStr::assert_from_raw_bytes(raw_bytes)); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use = "method should not be used for validation"] + #[track_caller] + pub fn assert_from_raw_bytes(string: &[u8]) -> &Self { + expect_encoded!(raw::validate_bytes(string)); + + Self::from_inner(string) + } + + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Safety + /// + /// The string must be valid for the [unspecified encoding] used by this + /// crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// let raw_bytes = raw.as_raw_bytes(); + /// assert_eq!(&*raw, unsafe { + /// RawOsStr::from_raw_bytes_unchecked(raw_bytes) + /// }); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use] + #[track_caller] + pub unsafe fn from_raw_bytes_unchecked(string: &[u8]) -> &Self { + #[cfg(debug_assertions)] + expect_encoded!(raw::validate_bytes(string)); + + Self::from_inner(string) } /// Returns the byte string stored by this container. @@ -349,10 +417,7 @@ impl RawOsStr { let index = find_fn(&self.0, pat)?; let prefix = &self.0[..index]; let suffix = &self.0[index + pat.len()..]; - Some(( - Self::from_raw_bytes_unchecked(prefix), - Self::from_raw_bytes_unchecked(suffix), - )) + Some((Self::from_inner(prefix), Self::from_inner(suffix))) } pub(super) fn rsplit_once_raw

(&self, pat: &P) -> Option<(&Self, &Self)> @@ -467,10 +532,7 @@ impl RawOsStr { self.check_bound(mid); let (prefix, suffix) = self.0.split_at(mid); - ( - Self::from_raw_bytes_unchecked(prefix), - Self::from_raw_bytes_unchecked(suffix), - ) + (Self::from_inner(prefix), Self::from_inner(suffix)) } pub(super) fn split_once_raw

(&self, pat: &P) -> Option<(&Self, &Self)> @@ -567,7 +629,7 @@ impl RawOsStr { let pat = pat.__encode(); let pat = pat.__get(); - self.0.strip_prefix(pat).map(Self::from_raw_bytes_unchecked) + self.0.strip_prefix(pat).map(Self::from_inner) } /// Equivalent to [`str::strip_suffix`]. @@ -593,11 +655,14 @@ impl RawOsStr { let pat = pat.__encode(); let pat = pat.__get(); - self.0.strip_suffix(pat).map(Self::from_raw_bytes_unchecked) + self.0.strip_suffix(pat).map(Self::from_inner) } /// Converts this representation back to a platform-native string. /// + /// When possible, use [`RawOsStrCow::into_os_str`] for a more efficient + /// conversion on some platforms. + /// /// # Examples /// /// ``` @@ -678,7 +743,14 @@ impl RawOsStr { while let Some(substring) = strip_fn(string, pat) { string = substring; } - Self::from_raw_bytes_unchecked(string) + Self::from_inner(string) + } + + fn trim_end_matches_raw

(&self, pat: &P) -> &Self + where + P: EncodedPattern, + { + self.trim_matches_raw_with(pat, <[_]>::strip_suffix) } /// Equivalent to [`str::trim_end_matches`]. @@ -698,7 +770,35 @@ impl RawOsStr { where P: Pattern, { - self.trim_matches_raw_with(&pat.__encode(), <[_]>::strip_suffix) + self.trim_end_matches_raw(&pat.__encode()) + } + + /// Equivalent to [`str::trim_matches`]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("111foo1bar111"); + /// assert_eq!("foo1bar", raw.trim_matches("1")); + /// assert_eq!("111foo1bar111", raw.trim_matches("o")); + /// ``` + #[inline] + #[must_use] + pub fn trim_matches

(&self, pat: P) -> &Self + where + P: Pattern, + { + let pat = pat.__encode(); + self.trim_start_matches_raw(&pat).trim_end_matches_raw(&pat) + } + + fn trim_start_matches_raw

(&self, pat: &P) -> &Self + where + P: EncodedPattern, + { + self.trim_matches_raw_with(pat, <[_]>::strip_prefix) } /// Equivalent to [`str::trim_start_matches`]. @@ -718,7 +818,7 @@ impl RawOsStr { where P: Pattern, { - self.trim_matches_raw_with(&pat.__encode(), <[_]>::strip_prefix) + self.trim_start_matches_raw(&pat.__encode()) } } @@ -773,6 +873,71 @@ impl ToOwned for RawOsStr { } } +/// Extensions to [`Cow`] for additional conversions. +/// +/// [`Cow`]: Cow +#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] +pub trait RawOsStrCow<'a>: private::Sealed { + /// Converts this representation back to a platform-native string. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// use os_str_bytes::RawOsStrCow; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// assert_eq!(os_string, raw.into_os_str()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[must_use] + fn into_os_str(self) -> Cow<'a, OsStr>; + + /// Returns the byte string stored by this container. + /// + /// The returned string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// + /// use os_str_bytes::RawOsStr; + /// use os_str_bytes::RawOsStrCow; + /// + /// let string = "foobar"; + /// let raw = Cow::Borrowed(RawOsStr::from_str(string)); + /// assert_eq!(string.as_bytes(), &*raw.into_raw_bytes()); + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[must_use] + fn into_raw_bytes(self) -> Cow<'a, [u8]>; +} + +impl<'a> RawOsStrCow<'a> for Cow<'a, RawOsStr> { + #[inline] + fn into_os_str(self) -> Cow<'a, OsStr> { + match self { + Cow::Borrowed(string) => string.to_os_str(), + Cow::Owned(string) => Cow::Owned(string.into_os_string()), + } + } + + #[inline] + fn into_raw_bytes(self) -> Cow<'a, [u8]> { + match self { + Cow::Borrowed(string) => Cow::Borrowed(&string.0), + Cow::Owned(string) => Cow::Owned(string.0), + } + } +} + /// A container for owned byte strings converted by this crate. /// /// For more information, see [`RawOsStr`]. @@ -827,6 +992,97 @@ impl RawOsString { Self(string.into_bytes()) } + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Panics + /// + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsString::new(os_string); + /// let raw_bytes = raw.clone().into_raw_vec(); + /// assert_eq!(raw, RawOsString::assert_from_raw_vec(raw_bytes)); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use = "method should not be used for validation"] + #[track_caller] + pub fn assert_from_raw_vec(string: Vec) -> Self { + expect_encoded!(raw::validate_bytes(&string)); + + Self(string) + } + + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Safety + /// + /// The string must be valid for the [unspecified encoding] used by this + /// crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsString::new(os_string); + /// let raw_bytes = raw.clone().into_raw_vec(); + /// assert_eq!(raw, unsafe { + /// RawOsString::from_raw_vec_unchecked(raw_bytes) + /// }); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use] + #[track_caller] + pub unsafe fn from_raw_vec_unchecked(string: Vec) -> Self { + #[cfg(debug_assertions)] + expect_encoded!(raw::validate_bytes(&string)); + + Self(string) + } + + /// Equivalent to [`String::clear`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let mut raw = RawOsString::new(os_string); + /// raw.clear(); + /// assert!(raw.is_empty()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + pub fn clear(&mut self) { + self.0.clear(); + } + /// Equivalent to [`String::into_boxed_str`]. /// /// # Examples @@ -902,6 +1158,74 @@ impl RawOsString { pub fn into_string(self) -> Result { String::from_utf8(self.0).map_err(|x| Self(x.into_bytes())) } + + /// Equivalent to [`String::shrink_to_fit`]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let string = "foobar".to_owned(); + /// let mut raw = RawOsString::from_string(string.clone()); + /// raw.shrink_to_fit(); + /// assert_eq!(string, raw); + /// ``` + #[inline] + pub fn shrink_to_fit(&mut self) { + self.0.shrink_to_fit(); + } + + /// Equivalent to [`String::split_off`]. + /// + /// # Panics + /// + /// Panics if the index is not a [valid boundary]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let mut raw = RawOsString::from_string("foobar".to_owned()); + /// assert_eq!("bar", raw.split_off(3)); + /// assert_eq!("foo", raw); + /// ``` + /// + /// [valid boundary]: RawOsStr#indices + #[inline] + #[must_use] + #[track_caller] + pub fn split_off(&mut self, at: usize) -> Self { + self.check_bound(at); + + Self(self.0.split_off(at)) + } + + /// Equivalent to [`String::truncate`]. + /// + /// # Panics + /// + /// Panics if the index is not a [valid boundary]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let mut raw = RawOsString::from_string("foobar".to_owned()); + /// raw.truncate(3); + /// assert_eq!("foo", raw); + /// ``` + /// + /// [valid boundary]: RawOsStr#indices + #[inline] + #[track_caller] + pub fn truncate(&mut self, new_len: usize) { + self.check_bound(new_len); + + self.0.truncate(new_len); + } } impl AsRef for RawOsString { @@ -923,7 +1247,7 @@ impl Deref for RawOsString { #[inline] fn deref(&self) -> &Self::Target { - RawOsStr::from_raw_bytes_unchecked(&self.0) + RawOsStr::from_inner(&self.0) } } @@ -1024,7 +1348,7 @@ macro_rules! r#impl { $(self.check_bound($bound);)+ )? - Self::from_raw_bytes_unchecked(&self.0[idx]) + Self::from_inner(&self.0[idx]) } } diff --git a/src/wasm/mod.rs b/src/wasm/mod.rs index 87622d5..82eb548 100644 --- a/src/wasm/mod.rs +++ b/src/wasm/mod.rs @@ -35,10 +35,12 @@ macro_rules! expect_utf8 { }; } +fn from_bytes(string: &[u8]) -> Result<&str> { + str::from_utf8(string).map_err(EncodingError) +} + pub(super) fn os_str_from_bytes(string: &[u8]) -> Result> { - str::from_utf8(string) - .map(|x| Cow::Borrowed(OsStr::new(x))) - .map_err(EncodingError) + from_bytes(string).map(|x| Cow::Borrowed(OsStr::new(x))) } pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { diff --git a/src/wasm/raw.rs b/src/wasm/raw.rs index 7a7a26a..fb291a6 100644 --- a/src/wasm/raw.rs +++ b/src/wasm/raw.rs @@ -4,6 +4,8 @@ use std::str; pub(crate) use crate::util::is_continuation; +use super::Result; + #[allow(dead_code)] #[path = "../common/raw.rs"] mod common_raw; @@ -12,6 +14,10 @@ pub(crate) use common_raw::starts_with; #[cfg(feature = "uniquote")] pub(crate) use common_raw::uniquote; +pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> { + super::from_bytes(string).map(drop) +} + pub(crate) fn decode_code_point(string: &[u8]) -> u32 { let string = expect_encoded!(str::from_utf8(string)); let mut chars = string.chars(); diff --git a/src/windows/raw.rs b/src/windows/raw.rs index 023eb30..80953de 100644 --- a/src/windows/raw.rs +++ b/src/windows/raw.rs @@ -7,6 +7,11 @@ use super::wtf8; pub(crate) use super::wtf8::ends_with; pub(crate) use super::wtf8::starts_with; use super::wtf8::CodePoints; +use super::Result; + +pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> { + wtf8::encode_wide(string).try_for_each(|x| x.map(drop)) +} pub(crate) fn encode_wide_unchecked( string: &[u8], diff --git a/tests/raw.rs b/tests/raw.rs index d670da6..407b125 100644 --- a/tests/raw.rs +++ b/tests/raw.rs @@ -1,25 +1,15 @@ #![cfg(feature = "raw_os_str")] -use std::ffi::OsStr; - -use os_str_bytes::EncodingError; -use os_str_bytes::OsStrBytes; use os_str_bytes::RawOsStr; mod common; use common::RAW_WTF8_STRING; -fn from_raw_bytes(string: &[u8]) -> Result<&RawOsStr, EncodingError> { - // SAFETY: The string is validated before conversion. - OsStr::from_raw_bytes(string) - .map(|_| unsafe { common::from_raw_bytes_unchecked(string) }) -} - #[test] fn test_ends_with() { #[track_caller] fn test(result: bool, suffix: &[u8]) { - let suffix = from_raw_bytes(suffix).unwrap(); + let suffix = RawOsStr::assert_from_raw_bytes(suffix); assert_eq!(result, RAW_WTF8_STRING.ends_with_os(suffix)); } @@ -62,7 +52,7 @@ fn test_empty_ends_with() { fn test_starts_with() { #[track_caller] fn test(result: bool, prefix: &[u8]) { - let prefix = from_raw_bytes(prefix).unwrap(); + let prefix = RawOsStr::assert_from_raw_bytes(prefix); assert_eq!(result, RAW_WTF8_STRING.starts_with_os(prefix)); }