Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OsStr methods for testing, stripping, and splitting Unicode prefixes. #111059

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions library/std/src/ffi/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::fmt;
use crate::hash::{Hash, Hasher};
use crate::ops;
use crate::rc::Rc;
use crate::str::pattern::Pattern;
use crate::str::FromStr;
use crate::sync::Arc;

Expand Down Expand Up @@ -178,6 +179,34 @@ impl OsString {
self.inner.into_string().map_err(|buf| OsString { inner: buf })
}

/// Splits the `OsString` into a Unicode prefix and non-Unicode suffix.
///
/// The returned `String` is the longest prefix of the `OsString` that
/// contained valid Unicode. The returned `OsString` is the rest of the
/// original value.
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::OsString;
///
/// let os_string = OsString::from("foo");
/// let (prefix, suffix) = os_string.clone().into_string_split();
///
/// let mut rejoined = OsString::from(prefix);
/// rejoined.push(suffix);
/// assert_eq!(rejoined, os_string);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn into_string_split(self) -> (String, OsString) {
let (prefix, suffix) = self.inner.into_string_split();
(prefix, OsString { inner: suffix })
}

/// Extends the string with the given <code>&[OsStr]</code> slice.
///
/// # Examples
Expand Down Expand Up @@ -703,6 +732,34 @@ impl OsStr {
self.inner.to_str()
}

/// Splits the `OsStr` into a Unicode prefix and non-Unicode suffix.
///
/// The returned `str` is the longest prefix of the `OsStr` that
/// contained valid Unicode. The returned `OsStr` is the rest of the
/// original value.
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let os_str = OsStr::new("foo");
/// let (prefix, suffix) = os_str.to_str_split();
///
/// let mut rejoined = OsString::from(prefix);
/// rejoined.push(suffix);
/// assert_eq!(rejoined, os_str);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn to_str_split(&self) -> (&str, &OsStr) {
let (prefix, suffix) = self.inner.to_str_split();
(prefix, Self::from_inner(suffix))
}

/// Converts an `OsStr` to a <code>[Cow]<[str]></code>.
///
/// Any non-Unicode sequences are replaced with
Expand Down Expand Up @@ -978,6 +1035,148 @@ impl OsStr {
pub fn eq_ignore_ascii_case<S: AsRef<OsStr>>(&self, other: S) -> bool {
self.inner.eq_ignore_ascii_case(&other.as_ref().inner)
}

/// Returns `true` if the given pattern matches a prefix of this `OsStr`.
///
/// Returns `false` if it does not.
///
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
/// function or closure that determines if a character matches.
///
/// [`char`]: prim@char
/// [pattern]: crate::str::pattern
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::OsString;
///
/// let bananas = OsString::from("bananas");
///
/// assert!(bananas.starts_with("bana"));
/// assert!(!bananas.starts_with("nana"));
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool {
let (p, _) = self.inner.to_str_split();
p.starts_with(pattern)
}

/// Returns `true` if the given `str` matches a prefix of this `OsStr`.
///
/// Same as [`OsStr::starts_with`], but is easier to optimize to a
/// direct bitwise comparison.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::OsString;
///
/// let bananas = OsString::from("bananas");
///
/// assert!(bananas.starts_with_str("bana"));
/// assert!(!bananas.starts_with_str("nana"));
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn starts_with_str(&self, prefix: &str) -> bool {
self.inner.starts_with_str(prefix)
}

/// Returns this `OsStr` with the given prefix removed.
///
/// If the `OsStr` starts with the pattern `prefix`, returns the substring
/// after the prefix, wrapped in `Some`.
///
/// If the `OsStr` does not start with `prefix`, returns `None`.
///
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
/// function or closure that determines if a character matches.
///
/// [`char`]: prim@char
/// [pattern]: crate::str::pattern
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let foobar = OsString::from("foo:bar");
///
/// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar")));
/// assert_eq!(foobar.strip_prefix("bar"), None);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a OsStr> {
Some(OsStr::from_inner(self.inner.strip_prefix(prefix)?))
}

/// Returns this `OsStr` with the given prefix removed.
///
/// Same as [`OsStr::strip_prefix`], but is easier to optimize to a
/// direct bitwise comparison.
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let foobar = OsString::from("foo:bar");
///
/// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar")));
/// assert_eq!(foobar.strip_prefix_str("bar"), None);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn strip_prefix_str(&self, prefix: &str) -> Option<&OsStr> {
Some(OsStr::from_inner(self.inner.strip_prefix_str(prefix)?))
}

/// Splits this `OsStr` on the first occurrence of the specified delimiter,
/// returning the prefix before delimiter and suffix after delimiter.
///
/// The prefix is returned as a `str`, because a successful `Pattern` match
/// implies its matching prefix was valid Unicode.
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let foo = OsString::from("foo:");
/// let foobar = OsString::from("foo:bar");
///
/// assert_eq!(foo.split_once(':'), Some(("foo", OsStr::new(""))));
/// assert_eq!(foobar.split_once(':'), Some(("foo", OsStr::new("bar"))));
/// assert_eq!(foobar.split_once('='), None);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a OsStr)> {
let (before, after) = self.inner.split_once(delimiter)?;
Some((before, OsStr::from_inner(after)))
}
}

#[stable(feature = "box_from_os_str", since = "1.17.0")]
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@
#![feature(needs_panic_runtime)]
#![feature(negative_impls)]
#![feature(never_type)]
#![feature(pattern)]
#![feature(platform_intrinsics)]
#![feature(prelude_import)]
#![feature(rustc_attrs)]
Expand Down
84 changes: 84 additions & 0 deletions library/std/src/sys/unix/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::fmt::Write;
use crate::mem;
use crate::rc::Rc;
use crate::str;
use crate::str::pattern::{Pattern, SearchStep, Searcher};
use crate::sync::Arc;
use crate::sys_common::{AsInner, IntoInner};

Expand Down Expand Up @@ -164,6 +165,27 @@ impl Buf {
String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() })
}

pub fn into_string_split(self) -> (String, Buf) {
let utf8_err = match str::from_utf8(&self.inner) {
Ok(_) => {
// SAFETY: If `str::from_utf8()` succeeds then the input is UTF-8.
let prefix = unsafe { String::from_utf8_unchecked(self.inner) };
return (prefix, Buf { inner: Vec::new() });
}
Err(err) => err,
};
let utf8_len = utf8_err.valid_up_to();
if utf8_len == 0 {
return (String::new(), self);
}
let mut utf8_bytes = self.inner;
let rem_bytes = utf8_bytes.split_off(utf8_len);
// SAFETY: `Utf8Error::valid_up_to()` returns an index up to which
// valid UTF-8 has been verified.
let prefix = unsafe { String::from_utf8_unchecked(utf8_bytes) };
(prefix, Buf { inner: rem_bytes })
}

pub fn push_slice(&mut self, s: &Slice) {
self.inner.extend_from_slice(&s.inner)
}
Expand Down Expand Up @@ -205,6 +227,21 @@ impl Slice {
str::from_utf8(&self.inner).ok()
}

pub fn to_str_split(&self) -> (&str, &Slice) {
let utf8_err = match str::from_utf8(&self.inner) {
Ok(prefix) => return (prefix, Slice::from_u8_slice(b"")),
Err(err) => err,
};
let utf8_len = utf8_err.valid_up_to();
if utf8_len == 0 {
return ("", self);
}
// SAFETY: `Utf8Error::valid_up_to()` returns an index up to which
// valid UTF-8 has been verified.
let prefix = unsafe { str::from_utf8_unchecked(&self.inner[..utf8_len]) };
(prefix, Slice::from_u8_slice(&self.inner[utf8_len..]))
}

pub fn to_string_lossy(&self) -> Cow<'_, str> {
String::from_utf8_lossy(&self.inner)
}
Expand Down Expand Up @@ -269,4 +306,51 @@ impl Slice {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}

#[inline]
pub fn starts_with_str(&self, prefix: &str) -> bool {
self.inner.starts_with(prefix.as_bytes())
}

pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> {
let (p, _) = self.to_str_split();
let prefix_len = match prefix.into_searcher(p).next() {
SearchStep::Match(0, prefix_len) => prefix_len,
_ => return None,
};

// SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
// and `Searcher` is known to return valid indices.
unsafe {
let suffix = self.inner.get_unchecked(prefix_len..);
Some(Slice::from_u8_slice(suffix))
}
}

#[inline]
pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> {
if !self.starts_with_str(prefix) {
return None;
}

// SAFETY: `prefix` is a prefix of `self.inner`.
unsafe {
let suffix = self.inner.get_unchecked(prefix.len()..);
Some(Slice::from_u8_slice(suffix))
}
}

pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> {
let (p, _) = self.to_str_split();
let (start, end) = delimiter.into_searcher(p).next_match()?;

// SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
// and `Searcher` is known to return valid indices.
unsafe {
let before = p.get_unchecked(..start);
let after = self.inner.get_unchecked(end..);

Some((before, Slice::from_u8_slice(after)))
}
}
}
Loading