Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add runtime validity checks inside MaybeUninit::assume_init #98073

Closed
wants to merge 12 commits into from
5 changes: 5 additions & 0 deletions compiler/rustc_codegen_cranelift/src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ pub(crate) fn codegen_const_value<'tcx>(
.iconst(fx.pointer_type, i64::try_from(end.checked_sub(start).unwrap()).unwrap());
CValue::by_val_pair(ptr, len, layout)
}
ConstValue::CustomSlice { data, length } => {
let ptr = pointer_for_allocation(fx, data).get_addr(fx);
let len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(length).unwrap());
CValue::by_val_pair(ptr, len, layout)
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
dest.write_cvalue(fx, val);
};

pref_align_of | needs_drop | type_id | type_name | variant_count, () {
pref_align_of | needs_drop | type_id | type_name | variant_count | validity_invariants_of, () {
let const_val =
fx.tcx.const_eval_instance(ParamEnv::reveal_all(), instance, None).unwrap();
let val = crate::constant::codegen_const_value(
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
| sym::needs_drop
| sym::type_id
| sym::type_name
| sym::variant_count => {
| sym::variant_count
| sym::validity_invariants_of => {
let value = bx
.tcx()
.const_eval_instance(ty::ParamEnv::reveal_all(), instance, None)
Expand Down
16 changes: 16 additions & 0 deletions compiler/rustc_codegen_ssa/src/mir/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,22 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
let b_llval = bx.const_usize((end - start) as u64);
OperandValue::Pair(a_llval, b_llval)
}
ConstValue::CustomSlice { data, length } => {
let Abi::ScalarPair(a_scalar, _) = layout.abi else {
bug!("from_const: invalid ScalarPair layout: {:#?}", layout);
};
let a = Scalar::from_pointer(
Pointer::new(bx.tcx().create_memory_alloc(data), Size::ZERO),
&bx.tcx(),
);
let a_llval = bx.scalar_to_backend(
a,
a_scalar,
bx.scalar_pair_element_backend_type(layout, 0, true),
);
let b_llval = bx.const_usize(length as u64);
OperandValue::Pair(a_llval, b_llval)
}
ConstValue::ByRef { alloc, offset } => {
return bx.load_operand(bx.from_const_alloc(layout, alloc, offset));
}
Expand Down
26 changes: 26 additions & 0 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use std::convert::TryFrom;

use rustc_hir::def_id::DefId;
use rustc_hir::lang_items::LangItem;
use rustc_middle::mir::{
self,
interpret::{ConstValue, GlobalId, InterpResult, Scalar},
Expand All @@ -16,6 +17,7 @@ use rustc_middle::ty::subst::SubstsRef;
use rustc_middle::ty::{Ty, TyCtxt};
use rustc_span::symbol::{sym, Symbol};
use rustc_target::abi::{Abi, Align, InitKind, Primitive, Size};
use rustc_target::spec::SanitizerSet;

use super::{
util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx, Machine, OpTy, PlaceTy,
Expand All @@ -24,6 +26,7 @@ use super::{

mod caller_location;
mod type_name;
mod validity_invariants_of;

fn numeric_intrinsic<Tag>(name: Symbol, bits: u128, kind: Primitive) -> Scalar<Tag> {
let size = match kind {
Expand Down Expand Up @@ -103,6 +106,23 @@ pub(crate) fn eval_nullary_intrinsic<'tcx>(
| ty::Tuple(_)
| ty::Error(_) => ConstValue::from_machine_usize(0u64, &tcx),
},
sym::validity_invariants_of => {
let msan = tcx.sess.opts.debugging_opts.sanitizer.contains(SanitizerSet::MEMORY);
let disable = tcx.sess.opts.debugging_opts.no_validity_invariant_checks;

let strictness = if disable {
validity_invariants_of::InvariantStrictness::Disable
} else if msan {
validity_invariants_of::InvariantStrictness::All
} else {
validity_invariants_of::InvariantStrictness::Normal
};

ensure_monomorphic_enough(tcx, tp_ty)?;
let (data, length) =
validity_invariants_of::alloc_validity_invariants_of(tcx, tp_ty, strictness);
ConstValue::CustomSlice { data, length }
}
other => bug!("`{}` is not a zero arg intrinsic", other),
})
}
Expand Down Expand Up @@ -162,13 +182,19 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
| sym::needs_drop
| sym::type_id
| sym::type_name
| sym::validity_invariants_of
| sym::variant_count => {
let gid = GlobalId { instance, promoted: None };
let ty = match intrinsic_name {
sym::pref_align_of | sym::variant_count => self.tcx.types.usize,
sym::needs_drop => self.tcx.types.bool,
sym::type_id => self.tcx.types.u64,
sym::type_name => self.tcx.mk_static_str(),
sym::validity_invariants_of => {
let item = self.tcx.require_lang_item(LangItem::ValidityInvariant, None);
let ty = self.tcx.type_of(item);
self.tcx.mk_imm_ref(self.tcx.lifetimes.re_static, self.tcx.mk_slice(ty))
}
_ => bug!(),
};
let val =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
use rustc_data_structures::fx::FxHashMap;
use rustc_hir::lang_items::LangItem;
use rustc_middle::mir::interpret::{AllocRange, Allocation, ConstAllocation, Scalar as MirScalar};
use rustc_middle::mir::Mutability;
use rustc_middle::ty::layout::LayoutCx;
use rustc_middle::ty::{ParamEnv, ParamEnvAnd};
use rustc_middle::ty::{Ty, TyCtxt};
use rustc_target::abi::{
Abi, FieldsShape, HasDataLayout, Integer, Primitive, Scalar, Size, TyAndLayout, Variants,
WrappingRange,
};

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum InvariantSize {
U8,
U16,
U32,
U64,
U128,
Pointer,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct InvariantKey {
offset: Size,
size: InvariantSize,
}

// FIXME: Don't add duplicate invariants (maybe use a HashMap?)
fn add_invariants<'tcx>(
tcx: TyCtxt<'tcx>,
ty: Ty<'tcx>,
invs: &mut FxHashMap<InvariantKey, WrappingRange>,
offset: Size,
strictness: InvariantStrictness,
) {
if strictness == InvariantStrictness::Disable {
return;
}

let x = tcx.layout_of(ParamEnvAnd { param_env: ParamEnv::reveal_all(), value: ty });

if let Ok(layout) = x {
if let Abi::Scalar(Scalar::Initialized { value, valid_range }) = layout.layout.abi() {
let size = match value {
Primitive::Int(Integer::I8, _) => InvariantSize::U8,
Primitive::Int(Integer::I16, _) => InvariantSize::U16,
Primitive::Int(Integer::I32, _) => InvariantSize::U32,
Primitive::Int(Integer::I64, _) => InvariantSize::U64,
Primitive::Int(Integer::I128, _) => InvariantSize::U128,
Primitive::F32 => InvariantSize::U32,
Primitive::F64 => InvariantSize::U64,
Primitive::Pointer => InvariantSize::Pointer,
};

if !valid_range.is_full_for(value.size(&tcx)) || strictness == InvariantStrictness::All
{
// Pick the first scalar we see, this means NonZeroU8(u8) ends up with only one
// invariant, the stricter one.
let _: Result<_, _> = invs.try_insert(InvariantKey { offset, size }, valid_range);
}
}

//dbg!(&ty, &layout);
if !matches!(layout.layout.variants(), Variants::Single { .. }) {
// We *don't* want to look for fields inside enums.
return;
}

let param_env = ParamEnv::reveal_all();
let layout_cx = LayoutCx { tcx, param_env };

match layout.layout.fields() {
FieldsShape::Primitive => {}
FieldsShape::Union(_) => {}
FieldsShape::Array { stride, count } => {
// We may wish to bail out if we're generating too many invariants.
// That would lead to false negatives, though.
for idx in 0..*count {
let off = offset + *stride * idx;
let f = layout.field(&layout_cx, idx as usize);
add_invariants(tcx, f.ty, invs, off, strictness);
}
}
FieldsShape::Arbitrary { offsets, .. } => {
for (idx, &field_offset) in offsets.iter().enumerate() {
let f = layout.field(&layout_cx, idx);
if f.ty == ty {
// Some types contain themselves as fields, such as
// &mut [T]
// Easy solution is to just not recurse then.
} else {
add_invariants(tcx, f.ty, invs, offset + field_offset, strictness);
}
}
}
}
}
}

fn get_layout_of_invariant<'tcx>(tcx: TyCtxt<'tcx>) -> TyAndLayout<'tcx, Ty<'tcx>> {
let item = tcx.require_lang_item(LangItem::ValidityInvariant, None);
let ty = tcx.type_of(item);
let layout = tcx
.layout_of(ParamEnv::reveal_all().and(ty))
.expect("invalid layout for ValidityInvariant lang item");
layout
}

#[derive(PartialEq, Clone, Copy, Eq)]
pub(crate) enum InvariantStrictness {
Disable,
Normal,
All,
}

/// Directly returns a `ConstAllocation` containing a list of validity invariants of the given type.
pub(crate) fn alloc_validity_invariants_of<'tcx>(
tcx: TyCtxt<'tcx>,
ty: Ty<'tcx>,
strictness: InvariantStrictness,
) -> (ConstAllocation<'tcx>, usize) {
let mut invs = FxHashMap::default();

let layout = tcx.data_layout();
let validity_invariant = get_layout_of_invariant(tcx);

add_invariants(tcx, ty, &mut invs, Size::ZERO, strictness);

let allocation_size = validity_invariant.layout.size() * invs.len() as u64;
let mut alloc =
Allocation::uninit(allocation_size, validity_invariant.layout.align().abi, true).unwrap();

let offset_off = validity_invariant.layout.fields().offset(0);
let size_off = validity_invariant.layout.fields().offset(1);
let start_off = validity_invariant.layout.fields().offset(2);
let end_off = validity_invariant.layout.fields().offset(3);

for (idx, invariant) in invs.iter().enumerate() {
let offset = idx as u64 * validity_invariant.layout.size();

let offset_range = AllocRange { start: offset + offset_off, size: layout.pointer_size };
alloc
.write_scalar(
&tcx,
offset_range,
MirScalar::from_machine_usize(invariant.0.offset.bytes(), &tcx).into(),
)
.unwrap();

let size_range = AllocRange { start: offset + size_off, size: Size::from_bytes(1) };
alloc
.write_scalar(&tcx, size_range, MirScalar::from_u8(invariant.0.size as u8).into())
.unwrap();

let offset_range = AllocRange { start: offset + start_off, size: Size::from_bytes(16) };
alloc
.write_scalar(&tcx, offset_range, MirScalar::from_u128(invariant.1.start).into())
.unwrap();

let offset_range = AllocRange { start: offset + end_off, size: Size::from_bytes(16) };
alloc
.write_scalar(&tcx, offset_range, MirScalar::from_u128(invariant.1.end).into())
.unwrap();
}

// The allocation is not mutable, we just needed write_scalar.
alloc.mutability = Mutability::Not;

(tcx.intern_const_alloc(alloc), invs.len())
}
13 changes: 13 additions & 0 deletions compiler/rustc_const_eval/src/interpret/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,19 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
self,
))
}
ConstValue::CustomSlice { data, length } => {
// We rely on mutability being set correctly in `data` to prevent writes
// where none should happen.
let ptr = Pointer::new(
self.tcx.create_memory_alloc(data),
Size::ZERO, // offset: 0
);
Operand::Immediate(Immediate::new_slice(
Scalar::from_pointer(self.global_base_pointer(ptr)?, &*self.tcx),
u64::try_from(length).unwrap(),
self,
))
}
};
Ok(OpTy { op, layout })
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_hir/src/lang_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ language_item_table! {
Range, sym::Range, range_struct, Target::Struct, GenericRequirement::None;
RangeToInclusive, sym::RangeToInclusive, range_to_inclusive_struct, Target::Struct, GenericRequirement::None;
RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None;
ValidityInvariant, sym::ValidityInvariant, validity_invariant_struct, Target::Struct, GenericRequirement::None;
}

pub enum GenericRequirement {
Expand Down
15 changes: 14 additions & 1 deletion compiler/rustc_middle/src/mir/interpret/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ pub enum ConstValue<'tcx> {
/// Used only for `&[u8]` and `&str`
Slice { data: ConstAllocation<'tcx>, start: usize, end: usize },

/// Like `Slice`, but for types that aren't 1 byte long.
CustomSlice { data: ConstAllocation<'tcx>, length: usize },
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unclear to me why Slice cannot be adapted for this, but my opinion here is probably not worth much since I don’t fiddle much around mir/interpret.


/// A value not represented/representable by `Scalar` or `Slice`
ByRef {
/// The backing memory of the value, may contain more memory than needed for just the value
Expand All @@ -61,6 +64,9 @@ impl<'a, 'tcx> Lift<'tcx> for ConstValue<'a> {
ConstValue::ByRef { alloc, offset } => {
ConstValue::ByRef { alloc: tcx.lift(alloc)?, offset }
}
ConstValue::CustomSlice { data, length } => {
ConstValue::CustomSlice { data: tcx.lift(data)?, length }
}
})
}
}
Expand All @@ -69,7 +75,9 @@ impl<'tcx> ConstValue<'tcx> {
#[inline]
pub fn try_to_scalar(&self) -> Option<Scalar<AllocId>> {
match *self {
ConstValue::ByRef { .. } | ConstValue::Slice { .. } => None,
ConstValue::ByRef { .. }
| ConstValue::Slice { .. }
| ConstValue::CustomSlice { .. } => None,
ConstValue::Scalar(val) => Some(val),
}
}
Expand Down Expand Up @@ -258,6 +266,11 @@ impl<Tag> Scalar<Tag> {
Scalar::Int(i.into())
}

#[inline]
pub fn from_u128(i: u128) -> Self {
Scalar::Int(i.into())
}

#[inline]
pub fn from_machine_usize(i: u64, cx: &impl HasDataLayout) -> Self {
Self::from_uint(i, cx.data_layout().pointer_size)
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_middle/src/mir/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ impl<'tcx> Visitor<'tcx> for ExtraComments<'tcx> {
let fmt_val = |val: &ConstValue<'tcx>| match val {
ConstValue::Scalar(s) => format!("Scalar({:?})", s),
ConstValue::Slice { .. } => format!("Slice(..)"),
ConstValue::CustomSlice { .. } => format!("CustomSlice(..)"),
ConstValue::ByRef { .. } => format!("ByRef(..)"),
};

Expand Down Expand Up @@ -679,7 +680,9 @@ pub fn write_allocations<'tcx>(
ConstValue::Scalar(interpret::Scalar::Int { .. }) => {
Either::Left(Either::Right(std::iter::empty()))
}
ConstValue::ByRef { alloc, .. } | ConstValue::Slice { data: alloc, .. } => {
ConstValue::ByRef { alloc, .. }
| ConstValue::Slice { data: alloc, .. }
| ConstValue::CustomSlice { data: alloc, .. } => {
Either::Right(alloc_ids_from_alloc(alloc))
}
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1380,6 +1380,8 @@ options! {
"run LLVM in non-parallel mode (while keeping codegen-units and ThinLTO)"),
no_unique_section_names: bool = (false, parse_bool, [TRACKED],
"do not use unique names for text and data sections when -Z function-sections is used"),
no_validity_invariant_checks: bool = (false, parse_bool, [TRACKED],
"do not generate any validity invariants in the validity_invariants_of intrinsic"),
no_profiler_runtime: bool = (false, parse_no_flag, [TRACKED],
"prevent automatic injection of the profiler_builtins crate"),
normalize_docs: bool = (false, parse_bool, [TRACKED],
Expand Down
Loading