Rollup merge of rust-lang#115443 - epage:os_str, r=cuviper

feat(std): Stabilize 'os_str_bytes' feature Closes rust-lang#111544
matthiaskrgr · Sep 2, 2023 · 43e1561 · 43e1561
2 parents b44bf0d + 30292bb
commit 43e1561
Show file tree

Hide file tree

Showing 17 changed files with 91 additions and 96 deletions.
diff --git a/library/std/src/ffi/mod.rs b/library/std/src/ffi/mod.rs
@@ -132,8 +132,8 @@
 //! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of
 //! UTF-8; see [`OsString`] for more details on its encoding on different platforms.
 //!
-//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_os_str_bytes`] and
-//! [`OsStr::from_os_str_bytes_unchecked`].
+//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_encoded_bytes`] and
+//! [`OsStr::from_encoded_bytes_unchecked`].
 //!
 //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
 //! [Unicode code point]: https://www.unicode.org/glossary/#code_point

diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
@@ -154,36 +154,34 @@ impl OsString {
     /// # Safety
     ///
     /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
-    /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
+    /// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same rust version
     /// built for the same target platform.  For example, reconstructing an `OsString` from bytes sent
     /// over the network or stored in a file will likely violate these safety rules.
     ///
-    /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
+    /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be
     /// split either immediately before or immediately after any valid non-empty UTF-8 substring.
     ///
     /// # Example
     ///
     /// ```
-    /// #![feature(os_str_bytes)]
-    ///
     /// use std::ffi::OsStr;
     ///
     /// let os_str = OsStr::new("Mary had a little lamb");
-    /// let bytes = os_str.as_os_str_bytes();
+    /// let bytes = os_str.as_encoded_bytes();
     /// let words = bytes.split(|b| *b == b' ');
     /// let words: Vec<&OsStr> = words.map(|word| {
     ///     // SAFETY:
-    ///     // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
+    ///     // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes`
     ///     // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
-    ///     unsafe { OsStr::from_os_str_bytes_unchecked(word) }
+    ///     unsafe { OsStr::from_encoded_bytes_unchecked(word) }
     /// }).collect();
     /// ```
     ///
     /// [conversions]: super#conversions
     #[inline]
-    #[unstable(feature = "os_str_bytes", issue = "111544")]
-    pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec<u8>) -> Self {
-        OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) }
+    #[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
+    pub unsafe fn from_encoded_bytes_unchecked(bytes: Vec<u8>) -> Self {
+        OsString { inner: Buf::from_encoded_bytes_unchecked(bytes) }
     }
 
     /// Converts to an [`OsStr`] slice.
@@ -205,7 +203,7 @@ impl OsString {
     }
 
     /// Converts the `OsString` into a byte slice.  To convert the byte slice back into an
-    /// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function.
+    /// `OsString`, use the [`OsStr::from_encoded_bytes_unchecked`] function.
     ///
     /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
     /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
@@ -219,9 +217,9 @@ impl OsString {
     ///
     /// [`std::ffi`]: crate::ffi
     #[inline]
-    #[unstable(feature = "os_str_bytes", issue = "111544")]
-    pub fn into_os_str_bytes(self) -> Vec<u8> {
-        self.inner.into_os_str_bytes()
+    #[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
+    pub fn into_encoded_bytes(self) -> Vec<u8> {
+        self.inner.into_encoded_bytes()
     }
 
     /// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
@@ -745,36 +743,34 @@ impl OsStr {
     /// # Safety
     ///
     /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
-    /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
+    /// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same rust version
     /// built for the same target platform.  For example, reconstructing an `OsStr` from bytes sent
     /// over the network or stored in a file will likely violate these safety rules.
     ///
-    /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
+    /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be
     /// split either immediately before or immediately after any valid non-empty UTF-8 substring.
     ///
     /// # Example
     ///
     /// ```
-    /// #![feature(os_str_bytes)]
-    ///
     /// use std::ffi::OsStr;
     ///
     /// let os_str = OsStr::new("Mary had a little lamb");
-    /// let bytes = os_str.as_os_str_bytes();
+    /// let bytes = os_str.as_encoded_bytes();
     /// let words = bytes.split(|b| *b == b' ');
     /// let words: Vec<&OsStr> = words.map(|word| {
     ///     // SAFETY:
-    ///     // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
+    ///     // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes`
     ///     // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
-    ///     unsafe { OsStr::from_os_str_bytes_unchecked(word) }
+    ///     unsafe { OsStr::from_encoded_bytes_unchecked(word) }
     /// }).collect();
     /// ```
     ///
     /// [conversions]: super#conversions
     #[inline]
-    #[unstable(feature = "os_str_bytes", issue = "111544")]
-    pub unsafe fn from_os_str_bytes_unchecked(bytes: &[u8]) -> &Self {
-        Self::from_inner(Slice::from_os_str_bytes_unchecked(bytes))
+    #[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
+    pub unsafe fn from_encoded_bytes_unchecked(bytes: &[u8]) -> &Self {
+        Self::from_inner(Slice::from_encoded_bytes_unchecked(bytes))
     }
 
     #[inline]
@@ -948,7 +944,7 @@ impl OsStr {
     }
 
     /// Converts an OS string slice to a byte slice.  To convert the byte slice back into an OS
-    /// string slice, use the [`OsStr::from_os_str_bytes_unchecked`] function.
+    /// string slice, use the [`OsStr::from_encoded_bytes_unchecked`] function.
     ///
     /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
     /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
@@ -962,9 +958,9 @@ impl OsStr {
     ///
     /// [`std::ffi`]: crate::ffi
     #[inline]
-    #[unstable(feature = "os_str_bytes", issue = "111544")]
-    pub fn as_os_str_bytes(&self) -> &[u8] {
-        self.inner.as_os_str_bytes()
+    #[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
+    pub fn as_encoded_bytes(&self) -> &[u8] {
+        self.inner.as_encoded_bytes()
     }
 
     /// Converts this string to its ASCII lower case equivalent in-place.
@@ -1270,7 +1266,7 @@ impl Default for &OsStr {
 impl PartialEq for OsStr {
     #[inline]
     fn eq(&self, other: &OsStr) -> bool {
-        self.as_os_str_bytes().eq(other.as_os_str_bytes())
+        self.as_encoded_bytes().eq(other.as_encoded_bytes())
     }
 }
 
@@ -1297,23 +1293,23 @@ impl Eq for OsStr {}
 impl PartialOrd for OsStr {
     #[inline]
     fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> {
-        self.as_os_str_bytes().partial_cmp(other.as_os_str_bytes())
+        self.as_encoded_bytes().partial_cmp(other.as_encoded_bytes())
     }
     #[inline]
     fn lt(&self, other: &OsStr) -> bool {
-        self.as_os_str_bytes().lt(other.as_os_str_bytes())
+        self.as_encoded_bytes().lt(other.as_encoded_bytes())
     }
     #[inline]
     fn le(&self, other: &OsStr) -> bool {
-        self.as_os_str_bytes().le(other.as_os_str_bytes())
+        self.as_encoded_bytes().le(other.as_encoded_bytes())
     }
     #[inline]
     fn gt(&self, other: &OsStr) -> bool {
-        self.as_os_str_bytes().gt(other.as_os_str_bytes())
+        self.as_encoded_bytes().gt(other.as_encoded_bytes())
     }
     #[inline]
     fn ge(&self, other: &OsStr) -> bool {
-        self.as_os_str_bytes().ge(other.as_os_str_bytes())
+        self.as_encoded_bytes().ge(other.as_encoded_bytes())
     }
 }
 
@@ -1332,7 +1328,7 @@ impl PartialOrd<str> for OsStr {
 impl Ord for OsStr {
     #[inline]
     fn cmp(&self, other: &OsStr) -> cmp::Ordering {
-        self.as_os_str_bytes().cmp(other.as_os_str_bytes())
+        self.as_encoded_bytes().cmp(other.as_encoded_bytes())
     }
 }
 
@@ -1382,7 +1378,7 @@ impl_cmp!(Cow<'a, OsStr>, OsString);
 impl Hash for OsStr {
     #[inline]
     fn hash<H: Hasher>(&self, state: &mut H) {
-        self.as_os_str_bytes().hash(state)
+        self.as_encoded_bytes().hash(state)
     }
 }
 

diff --git a/library/std/src/path.rs b/library/std/src/path.rs
@@ -193,7 +193,7 @@ impl<'a> Prefix<'a> {
     fn len(&self) -> usize {
         use self::Prefix::*;
         fn os_str_len(s: &OsStr) -> usize {
-            s.as_os_str_bytes().len()
+            s.as_encoded_bytes().len()
         }
         match *self {
             Verbatim(x) => 4 + os_str_len(x),
@@ -316,31 +316,31 @@ fn has_physical_root(s: &[u8], prefix: Option<Prefix<'_>>) -> bool {
 
 // basic workhorse for splitting stem and extension
 fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
-    if file.as_os_str_bytes() == b".." {
+    if file.as_encoded_bytes() == b".." {
         return (Some(file), None);
     }
 
     // The unsafety here stems from converting between &OsStr and &[u8]
     // and back. This is safe to do because (1) we only look at ASCII
     // contents of the encoding and (2) new &OsStr values are produced
     // only from ASCII-bounded slices of existing &OsStr values.
-    let mut iter = file.as_os_str_bytes().rsplitn(2, |b| *b == b'.');
+    let mut iter = file.as_encoded_bytes().rsplitn(2, |b| *b == b'.');
     let after = iter.next();
     let before = iter.next();
     if before == Some(b"") {
         (Some(file), None)
     } else {
         unsafe {
             (
-                before.map(|s| OsStr::from_os_str_bytes_unchecked(s)),
-                after.map(|s| OsStr::from_os_str_bytes_unchecked(s)),
+                before.map(|s| OsStr::from_encoded_bytes_unchecked(s)),
+                after.map(|s| OsStr::from_encoded_bytes_unchecked(s)),
             )
         }
     }
 }
 
 fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
-    let slice = file.as_os_str_bytes();
+    let slice = file.as_encoded_bytes();
     if slice == b".." {
         return (file, None);
     }
@@ -357,8 +357,8 @@ fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
     let after = &slice[i + 1..];
     unsafe {
         (
-            OsStr::from_os_str_bytes_unchecked(before),
-            Some(OsStr::from_os_str_bytes_unchecked(after)),
+            OsStr::from_encoded_bytes_unchecked(before),
+            Some(OsStr::from_encoded_bytes_unchecked(after)),
         )
     }
 }
@@ -739,7 +739,7 @@ impl<'a> Components<'a> {
             // separately via `include_cur_dir`
             b".." => Some(Component::ParentDir),
             b"" => None,
-            _ => Some(Component::Normal(unsafe { OsStr::from_os_str_bytes_unchecked(comp) })),
+            _ => Some(Component::Normal(unsafe { OsStr::from_encoded_bytes_unchecked(comp) })),
         }
     }
 
@@ -896,7 +896,7 @@ impl<'a> Iterator for Components<'a> {
                     let raw = &self.path[..self.prefix_len()];
                     self.path = &self.path[self.prefix_len()..];
                     return Some(Component::Prefix(PrefixComponent {
-                        raw: unsafe { OsStr::from_os_str_bytes_unchecked(raw) },
+                        raw: unsafe { OsStr::from_encoded_bytes_unchecked(raw) },
                         parsed: self.prefix.unwrap(),
                     }));
                 }
@@ -968,7 +968,7 @@ impl<'a> DoubleEndedIterator for Components<'a> {
                 State::Prefix if self.prefix_len() > 0 => {
                     self.back = State::Done;
                     return Some(Component::Prefix(PrefixComponent {
-                        raw: unsafe { OsStr::from_os_str_bytes_unchecked(self.path) },
+                        raw: unsafe { OsStr::from_encoded_bytes_unchecked(self.path) },
                         parsed: self.prefix.unwrap(),
                     }));
                 }
@@ -1477,17 +1477,17 @@ impl PathBuf {
     fn _set_extension(&mut self, extension: &OsStr) -> bool {
         let file_stem = match self.file_stem() {
             None => return false,
-            Some(f) => f.as_os_str_bytes(),
+            Some(f) => f.as_encoded_bytes(),
         };
 
         // truncate until right after the file stem
         let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr();
-        let start = self.inner.as_os_str_bytes().as_ptr().addr();
+        let start = self.inner.as_encoded_bytes().as_ptr().addr();
         let v = self.as_mut_vec();
         v.truncate(end_file_stem.wrapping_sub(start));
 
         // add the new extension, if any
-        let new = extension.as_os_str_bytes();
+        let new = extension.as_encoded_bytes();
         if !new.is_empty() {
             v.reserve_exact(new.len() + 1);
             v.push(b'.');
@@ -2007,11 +2007,11 @@ impl Path {
     // The following (private!) function allows construction of a path from a u8
     // slice, which is only safe when it is known to follow the OsStr encoding.
     unsafe fn from_u8_slice(s: &[u8]) -> &Path {
-        unsafe { Path::new(OsStr::from_os_str_bytes_unchecked(s)) }
+        unsafe { Path::new(OsStr::from_encoded_bytes_unchecked(s)) }
     }
     // The following (private!) function reveals the byte encoding used for OsStr.
     fn as_u8_slice(&self) -> &[u8] {
-        self.inner.as_os_str_bytes()
+        self.inner.as_encoded_bytes()
     }
 
     /// Directly wraps a string slice as a `Path` slice.
@@ -2609,7 +2609,7 @@ impl Path {
 
     fn _with_extension(&self, extension: &OsStr) -> PathBuf {
         let self_len = self.as_os_str().len();
-        let self_bytes = self.as_os_str().as_os_str_bytes();
+        let self_bytes = self.as_os_str().as_encoded_bytes();
 
         let (new_capacity, slice_to_copy) = match self.extension() {
             None => {

diff --git a/library/std/src/sys/common/small_c_string.rs b/library/std/src/sys/common/small_c_string.rs
@@ -19,7 +19,7 @@ pub fn run_path_with_cstr<T, F>(path: &Path, f: F) -> io::Result<T>
 where
     F: FnOnce(&CStr) -> io::Result<T>,
 {
-    run_with_cstr(path.as_os_str().as_os_str_bytes(), f)
+    run_with_cstr(path.as_os_str().as_encoded_bytes(), f)
 }
 
 #[inline]

diff --git a/library/std/src/sys/common/tests.rs b/library/std/src/sys/common/tests.rs
@@ -8,7 +8,7 @@ use core::iter::repeat;
 fn stack_allocation_works() {
     let path = Path::new("abc");
     let result = run_path_with_cstr(path, |p| {
-        assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap());
+        assert_eq!(p, &*CString::new(path.as_os_str().as_encoded_bytes()).unwrap());
         Ok(42)
     });
     assert_eq!(result.unwrap(), 42);
@@ -25,7 +25,7 @@ fn heap_allocation_works() {
     let path = repeat("a").take(384).collect::<String>();
     let path = Path::new(&path);
     let result = run_path_with_cstr(path, |p| {
-        assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap());
+        assert_eq!(p, &*CString::new(path.as_os_str().as_encoded_bytes()).unwrap());
         Ok(42)
     });
     assert_eq!(result.unwrap(), 42);

diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs
@@ -97,12 +97,12 @@ impl AsInner<[u8]> for Buf {
 
 impl Buf {
     #[inline]
-    pub fn into_os_str_bytes(self) -> Vec<u8> {
+    pub fn into_encoded_bytes(self) -> Vec<u8> {
         self.inner
     }
 
     #[inline]
-    pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
+    pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self {
         Self { inner: s }
     }
 
@@ -203,18 +203,18 @@ impl Buf {
 
 impl Slice {
     #[inline]
-    pub fn as_os_str_bytes(&self) -> &[u8] {
+    pub fn as_encoded_bytes(&self) -> &[u8] {
         &self.inner
     }
 
     #[inline]
-    pub unsafe fn from_os_str_bytes_unchecked(s: &[u8]) -> &Slice {
+    pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice {
         unsafe { mem::transmute(s) }
     }
 
     #[inline]
     pub fn from_str(s: &str) -> &Slice {
-        unsafe { Slice::from_os_str_bytes_unchecked(s.as_bytes()) }
+        unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) }
     }
 
     pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> {