Skip to content

Commit

Permalink
Merge pull request #236 from artichoke/lopopolo/intern-stacked-borrow…
Browse files Browse the repository at this point in the history
…s-miri-gh-235

Address stacked borrows violation in `SymbolTable::intern`
  • Loading branch information
lopopolo authored Jul 26, 2023
2 parents 6b7568c + 3a31e98 commit e2820d2
Show file tree
Hide file tree
Showing 11 changed files with 362 additions and 33 deletions.
50 changes: 46 additions & 4 deletions src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
//! In general, one should expect this crate's performance on `&[u8]` to be
//! roughly similar to performance on `&str`.

use core::convert::TryInto;
use core::hash::BuildHasher;
use core::iter::{FromIterator, FusedIterator, Zip};
use core::marker::PhantomData;
Expand Down Expand Up @@ -692,23 +691,66 @@ where
if let Some(&id) = self.map.get(&*contents) {
return Ok(id);
}

// The `Interned::Owned` variant is derived from a `Box<T>`. When such
// a structure is moved or assigned, as it is below in the call to
// `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
//
// Retagging an allocation pops all of the borrows derived from it off
// of the stack. This means we need to move the `Interned` into the
// `Vec` before calling `Interned::as_static_slice` to ensure the
// reference does not get invalidated by retagging.
//
// However, that alone may be insufficient as the `Interened` may be
// moved when the symbol table grows.
//
// The `SymbolTable` API prevents shared references to the `Interned`
// being invalidated by a retag by tying resolved symbol contents,
// `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
// grow, shrink, or otherwise reallocate/move contents while a reference
// to the `Interned`'s inner `T` is alive.
//
// To protect against future updates to stacked borrows or the unsafe
// code operational semantics, we can address this additional invariant
// with updated `Interned` internals which store the `Box<T>` in a raw
// pointer form, which allows moves to be treated as untyped copies.
//
// See:
//
// - <https://github.com/artichoke/intaglio/issues/235>
// - <https://github.com/artichoke/intaglio/pull/236>
let name = Interned::from(contents);
let id = self.map.len().try_into()?;
let id = Symbol::try_from(self.map.len())?;

// Move the `Interned` into the `Vec`, causing it to be retagged under
// stacked borrows, before taking any references to its inner `T`.
self.vec.push(name);
// Ensure we grow the map before we take any shared references to the
// inner `T`.
self.map.reserve(1);

// SAFETY: `self.vec` is non-empty because the preceding line of code
// pushed an entry into it.
let name = unsafe { self.vec.last().unwrap_unchecked() };

// SAFETY: This expression creates a reference with a `'static` lifetime
// from an owned and interned buffer, which is permissible because:
//
// - `Interned` is an internal implementation detail of `SymbolTable`.
// - `SymbolTable` never gives out `'static` references to underlying
// byte string byte contents.
// contents.
// - All slice references given out by the `SymbolTable` have the same
// lifetime as the `SymbolTable`.
// - The `map` field of `SymbolTable`, which contains the `'static`
// references, is dropped before the owned buffers stored in this
// `Interned`.
// - The shared reference may be derived from a `PinBox` which prevents
// moves from retagging the underlying boxed `T` under stacked borrows.
// - The symbol table cannot grow, shrink, or otherwise move its contents
// while this reference is alive.
let slice = unsafe { name.as_static_slice() };

self.map.insert(slice, id);
self.vec.push(name);

debug_assert_eq!(self.get(id), Some(slice));
debug_assert_eq!(self.intern(slice), Ok(id));
Expand Down
50 changes: 46 additions & 4 deletions src/cstr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
//! [`CString`]: std::ffi::CString
//! [`&CStr`]: std::ffi::CStr

use core::convert::TryInto;
use core::hash::BuildHasher;
use core::iter::{FromIterator, FusedIterator, Zip};
use core::marker::PhantomData;
Expand Down Expand Up @@ -715,23 +714,66 @@ where
if let Some(&id) = self.map.get(&*contents) {
return Ok(id);
}

// The `Interned::Owned` variant is derived from a `Box<T>`. When such
// a structure is moved or assigned, as it is below in the call to
// `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
//
// Retagging an allocation pops all of the borrows derived from it off
// of the stack. This means we need to move the `Interned` into the
// `Vec` before calling `Interned::as_static_slice` to ensure the
// reference does not get invalidated by retagging.
//
// However, that alone may be insufficient as the `Interened` may be
// moved when the symbol table grows.
//
// The `SymbolTable` API prevents shared references to the `Interned`
// being invalidated by a retag by tying resolved symbol contents,
// `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
// grow, shrink, or otherwise reallocate/move contents while a reference
// to the `Interned`'s inner `T` is alive.
//
// To protect against future updates to stacked borrows or the unsafe
// code operational semantics, we can address this additional invariant
// with updated `Interned` internals which store the `Box<T>` in a raw
// pointer form, which allows moves to be treated as untyped copies.
//
// See:
//
// - <https://github.com/artichoke/intaglio/issues/235>
// - <https://github.com/artichoke/intaglio/pull/236>
let name = Interned::from(contents);
let id = self.map.len().try_into()?;
let id = Symbol::try_from(self.map.len())?;

// Move the `Interned` into the `Vec`, causing it to be retagged under
// stacked borrows, before taking any references to its inner `T`.
self.vec.push(name);
// Ensure we grow the map before we take any shared references to the
// inner `T`.
self.map.reserve(1);

// SAFETY: `self.vec` is non-empty because the preceding line of code
// pushed an entry into it.
let name = unsafe { self.vec.last().unwrap_unchecked() };

// SAFETY: This expression creates a reference with a `'static` lifetime
// from an owned and interned buffer, which is permissible because:
//
// - `Interned` is an internal implementation detail of `SymbolTable`.
// - `SymbolTable` never gives out `'static` references to underlying
// C string byte contents.
// contents.
// - All slice references given out by the `SymbolTable` have the same
// lifetime as the `SymbolTable`.
// - The `map` field of `SymbolTable`, which contains the `'static`
// references, is dropped before the owned buffers stored in this
// `Interned`.
// - The shared reference may be derived from a `PinBox` which prevents
// moves from retagging the underlying boxed `T` under stacked borrows.
// - The symbol table cannot grow, shrink, or otherwise move its contents
// while this reference is alive.
let slice = unsafe { name.as_static_slice() };

self.map.insert(slice, id);
self.vec.push(name);

debug_assert_eq!(self.get(id), Some(slice));
debug_assert_eq!(self.intern(slice), Ok(id));
Expand Down
129 changes: 117 additions & 12 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ use std::ffi::{OsStr, OsString};
#[cfg(feature = "path")]
use std::path::{Path, PathBuf};

use self::boxed::PinBox;

/// Wrapper around `&'static` slices that does not allow mutable access to the
/// inner slice.
pub struct Interned<T: 'static + ?Sized>(Slice<T>);
Expand Down Expand Up @@ -59,7 +61,7 @@ where
{
/// Return a reference to the inner slice.
#[inline]
pub const fn as_slice(&self) -> &T {
pub fn as_slice(&self) -> &T {
self.0.as_slice()
}

Expand Down Expand Up @@ -127,7 +129,7 @@ enum Slice<T: 'static + ?Sized> {
/// True `'static` references.
Static(&'static T),
/// Owned `'static` references.
Owned(Box<T>),
Owned(PinBox<T>),
}

impl<T> From<&'static T> for Slice<T>
Expand All @@ -143,7 +145,7 @@ where
impl From<String> for Slice<str> {
#[inline]
fn from(owned: String) -> Self {
Self::Owned(owned.into_boxed_str())
Self::Owned(PinBox::new(owned.into_boxed_str()))
}
}

Expand All @@ -161,7 +163,7 @@ impl From<Cow<'static, str>> for Slice<str> {
impl From<Vec<u8>> for Slice<[u8]> {
#[inline]
fn from(owned: Vec<u8>) -> Self {
Self::Owned(owned.into_boxed_slice())
Self::Owned(PinBox::new(owned.into_boxed_slice()))
}
}

Expand All @@ -180,7 +182,7 @@ impl From<Cow<'static, [u8]>> for Slice<[u8]> {
impl From<CString> for Slice<CStr> {
#[inline]
fn from(owned: CString) -> Self {
Self::Owned(owned.into_boxed_c_str())
Self::Owned(PinBox::new(owned.into_boxed_c_str()))
}
}

Expand All @@ -199,7 +201,7 @@ impl From<Cow<'static, CStr>> for Slice<CStr> {
impl From<OsString> for Slice<OsStr> {
#[inline]
fn from(owned: OsString) -> Self {
Self::Owned(owned.into_boxed_os_str())
Self::Owned(PinBox::new(owned.into_boxed_os_str()))
}
}

Expand All @@ -218,7 +220,7 @@ impl From<Cow<'static, OsStr>> for Slice<OsStr> {
impl From<PathBuf> for Slice<Path> {
#[inline]
fn from(owned: PathBuf) -> Self {
Self::Owned(owned.into_boxed_path())
Self::Owned(PinBox::new(owned.into_boxed_path()))
}
}

Expand All @@ -239,10 +241,13 @@ where
{
/// Return a reference to the inner slice.
#[inline]
const fn as_slice(&self) -> &T {
fn as_slice(&self) -> &T {
match self {
Self::Static(slice) => slice,
Self::Owned(owned) => owned,
Self::Owned(owned) => {
// SAFETY: `PinBox` acts like `Box`.
unsafe { owned.as_ref() }
}
}
}

Expand All @@ -258,8 +263,6 @@ where
match self {
Self::Static(slice) => slice,
Self::Owned(owned) => {
// Coerce the `Box<T>` to a pointer.
let ptr: *const T = &**owned;
// SAFETY: This expression creates a reference with a `'static`
// lifetime from an owned buffer, which is permissible because:
//
Expand All @@ -270,9 +273,10 @@ where
// - The `map` field of the various symbol tables which contains
// the `'static` references, is dropped before the owned buffers
// stored in this `Slice`.
// - `PinBox` acts like `Box`.
unsafe {
// Coerce the pointer to a `&'static T`.
&*ptr
owned.as_ref()
}
}
}
Expand Down Expand Up @@ -354,6 +358,107 @@ impl fmt::Debug for Slice<Path> {
}
}

/// An abstraction over a `Box<T>` where T is an unsized slice type which moves
/// the box by raw pointer. This type is required to satisfy Miri with
/// `-Zmiri-retag-fields`. See #235, #236.
///
/// The `PinBox` type is derived from:
///
/// - <https://github.com/CAD97/simple-interner/blob/24a836e9f8a0173faf48438d711442c2a86659c1/src/interner.rs#L26-L56>
/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1651058752>
/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1652003240>
///
/// This code is placed into the public domain by @CAD97:
///
/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1652393974>
mod boxed {
use core::fmt;
use core::marker::PhantomData;
use core::ptr::NonNull;

/// A wrapper around box that does not provide &mut access to the pointee and
/// uses raw-pointer borrowing rules to avoid invalidating extant references.
///
/// The resolved reference is guaranteed valid until the `PinBox` is dropped.
///
/// This type is meant to allow the owned data in the given `Box<T>` to be moved
/// without being retagged by Miri. See #235, #236.
pub(crate) struct PinBox<T: ?Sized> {
ptr: NonNull<T>,
_marker: PhantomData<Box<T>>,
}

impl<T: ?Sized> PinBox<T> {
#[inline]
pub(crate) fn new(x: Box<T>) -> Self {
let ptr = Box::into_raw(x);
// SAFETY: `ptr` is derived from `Box::into_raw` and can never be null.
let ptr = unsafe { NonNull::new_unchecked(ptr) };
Self {
ptr,
_marker: PhantomData,
}
}

#[inline]
pub(crate) unsafe fn as_ref<'a>(&self) -> &'a T {
// SAFETY: `PinBox` acts like `Box`, `self.ptr` is non-null and points
// to a live `Box`.
unsafe { self.ptr.as_ref() }
}
}

impl<T: ?Sized> Drop for PinBox<T> {
fn drop(&mut self) {
// SAFETY: `PinBox` acts like `Box`.
unsafe {
drop(Box::from_raw(self.ptr.as_ptr()));
}
}
}

impl<T: ?Sized + fmt::Debug> fmt::Debug for PinBox<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// SAFETY: `PinBox` acts like `Box`.
let s = unsafe { self.as_ref() };
s.fmt(f)
}
}

#[cfg(test)]
mod tests {
use core::fmt::Write;

use super::PinBox;

#[test]
fn test_drop() {
let x = "abc".to_string().into_boxed_str();
let x = PinBox::new(x);
drop(x);
}

#[test]
fn test_as_ref() {
let x = "abc".to_string().into_boxed_str();
let x = PinBox::new(x);

// SAFETY: `PinBox` acts like `Box` and contains a valid pointer.
assert_eq!(unsafe { x.as_ref() }, "abc");
}

#[test]
fn test_debug_format() {
let x = "abc".to_string().into_boxed_str();
let x = PinBox::new(x);

let mut buf = String::new();
write!(&mut buf, "{x:?}").unwrap();
assert_eq!(buf, "\"abc\"");
}
}
}

#[cfg(test)]
mod tests {
use core::fmt::Write;
Expand Down
Loading

0 comments on commit e2820d2

Please sign in to comment.