Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure ptr::read gets all the same LLVM load metadata that dereferencing does #109035

Merged
merged 6 commits into from
Mar 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,8 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
sym::likely => (0, vec![tcx.types.bool], tcx.types.bool),
sym::unlikely => (0, vec![tcx.types.bool], tcx.types.bool),

sym::read_via_copy => (1, vec![tcx.mk_imm_ptr(param(0))], param(0)),

sym::discriminant_value => {
let assoc_items = tcx.associated_item_def_ids(
tcx.require_lang_item(hir::LangItem::DiscriminantKind, None),
Expand Down
7 changes: 4 additions & 3 deletions compiler/rustc_lint_defs/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1026,12 +1026,13 @@ declare_lint! {
/// ### Example
///
/// ```rust,compile_fail
/// #![feature(const_ptr_read)]
/// #![feature(const_mut_refs)]
/// const FOO: () = unsafe {
/// let x = &[0_u8; 4];
/// let y = x.as_ptr().cast::<u32>();
/// y.read(); // the address of a `u8` array is unknown and thus we don't know if
/// // it is aligned enough for reading a `u32`.
/// let mut z = 123;
/// y.copy_to_nonoverlapping(&mut z, 1); // the address of a `u8` array is unknown
/// // and thus we don't know if it is aligned enough for copying a `u32`.
/// };
/// ```
///
Expand Down
29 changes: 29 additions & 0 deletions compiler/rustc_mir_transform/src/lower_intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,35 @@ impl<'tcx> MirPass<'tcx> for LowerIntrinsics {
terminator.kind = TerminatorKind::Goto { target };
}
}
sym::read_via_copy => {
let [arg] = args.as_slice() else {
span_bug!(terminator.source_info.span, "Wrong number of arguments");
};
let derefed_place =
if let Some(place) = arg.place() && let Some(local) = place.as_local() {
tcx.mk_place_deref(local.into())
} else {
span_bug!(terminator.source_info.span, "Only passing a local is supported");
};
terminator.kind = match *target {
None => {
// No target means this read something uninhabited,
// so it must be unreachable, and we don't need to
// preserve the assignment either.
TerminatorKind::Unreachable
}
Some(target) => {
block.statements.push(Statement {
source_info: terminator.source_info,
kind: StatementKind::Assign(Box::new((
*destination,
Rvalue::Use(Operand::Copy(derefed_place)),
))),
});
TerminatorKind::Goto { target }
}
}
}
sym::discriminant_value => {
if let (Some(target), Some(arg)) = (*target, args[0].place()) {
let arg = tcx.mk_place_deref(arg);
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,7 @@ symbols! {
read_enum_variant_arg,
read_struct,
read_struct_field,
read_via_copy,
readonly,
realloc,
reason,
Expand Down
10 changes: 10 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,16 @@ extern "rust-intrinsic" {
#[rustc_safe_intrinsic]
pub fn saturating_sub<T: Copy>(a: T, b: T) -> T;

/// This is an implementation detail of [`crate::ptr::read`] and should
/// not be used anywhere else. See its comments for why this exists.
///
/// This intrinsic can *only* be called where the argument is a local without
/// projections (`read_via_copy(p)`, not `read_via_copy(*p)`) so that it
/// trivially obeys runtime-MIR rules about derefs in operands.
#[cfg(not(bootstrap))]
#[rustc_const_unstable(feature = "const_ptr_read", issue = "80377")]
pub fn read_via_copy<T>(p: *const T) -> T;
WaffleLapkin marked this conversation as resolved.
Show resolved Hide resolved

/// Returns the value of the discriminant for the variant in 'v';
/// if `T` has no discriminant, returns `0`.
///
Expand Down
59 changes: 45 additions & 14 deletions library/core/src/ptr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1135,27 +1135,58 @@ pub const unsafe fn replace<T>(dst: *mut T, mut src: T) -> T {
#[rustc_const_unstable(feature = "const_ptr_read", issue = "80377")]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub const unsafe fn read<T>(src: *const T) -> T {
// We are calling the intrinsics directly to avoid function calls in the generated code
// as `intrinsics::copy_nonoverlapping` is a wrapper function.
extern "rust-intrinsic" {
#[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")]
fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
}
// It would be semantically correct to implement this via `copy_nonoverlapping`
// and `MaybeUninit`, as was done before PR #109035. Calling `assume_init`
// provides enough information to know that this is a typed operation.

let mut tmp = MaybeUninit::<T>::uninit();
// SAFETY: the caller must guarantee that `src` is valid for reads.
// `src` cannot overlap `tmp` because `tmp` was just allocated on
// the stack as a separate allocated object.
// However, as of March 2023 the compiler was not capable of taking advantage
// of that information. Thus the implementation here switched to an intrinsic,
// which lowers to `_0 = *src` in MIR, to address a few issues:
//
// Also, since we just wrote a valid value into `tmp`, it is guaranteed
// to be properly initialized.
// - Using `MaybeUninit::assume_init` after a `copy_nonoverlapping` was not
// turning the untyped copy into a typed load. As such, the generated
// `load` in LLVM didn't get various metadata, such as `!range` (#73258),
// `!nonnull`, and `!noundef`, resulting in poorer optimization.
scottmcm marked this conversation as resolved.
Show resolved Hide resolved
// - Going through the extra local resulted in multiple extra copies, even
// in optimized MIR. (Ignoring StorageLive/Dead, the intrinsic is one
// MIR statement, while the previous implementation was eight.) LLVM
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To show my work for these numbers,

With this PR:

    bb0: {
        StorageLive(_2);                 // scope 0 at C:\src\rust\tests\codegen\aaaaaa-mir-dump-demo.rs:9:5: 9:23
        _0 = (*_1);                      // scope 2 at C:\src\rust\library\core\src\ptr\mod.rs:1167:13: 1167:50
        StorageDead(_2);                 // scope 0 at C:\src\rust\tests\codegen\aaaaaa-mir-dump-demo.rs:9:5: 9:23
        return;                          // scope 0 at C:\src\rust\tests\codegen\aaaaaa-mir-dump-demo.rs:10:2: 10:2
    }

Previously:

    bb0: {
        StorageLive(_6);                 // scope 0 at C:\src\rust\tests\codegen\aaaaaa-mir-dump-demo.rs:12:5: 12:27
        StorageLive(_2);                 // scope 2 at C:\src\rust\library\core\src\ptr\mod.rs:1195:17: 1195:24
        StorageLive(_7);                 // scope 5 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:314:31: 314:33
        _2 = MaybeUninit::<T> { uninit: move _7 }; // scope 5 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:314:9: 314:35
        StorageDead(_7);                 // scope 5 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:314:34: 314:35
        StorageLive(_3);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:38: 1196:54
        StorageLive(_4);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:38: 1196:54
        _4 = &mut _2;                    // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:38: 1196:54
        StorageLive(_8);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:42: 1196:54
        _8 = &raw mut (*_4);             // scope 6 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:569:9: 569:13
        _3 = _8 as *mut T (PtrToPtr);    // scope 6 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:569:9: 569:33
        StorageDead(_8);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:42: 1196:54
        StorageDead(_4);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:53: 1196:54
        copy_nonoverlapping(dst = move _3, src = _1, count = const 1_usize); // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:13: 1196:58
        StorageDead(_3);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1196:57: 1196:58
        StorageLive(_5);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1197:13: 1197:16
        _5 = move _2;                    // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1197:13: 1197:16
        StorageLive(_9);                 // scope 8 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:627:38: 627:48
        _9 = move (_5.1: std::mem::ManuallyDrop<T>); // scope 8 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:627:38: 627:48
        _0 = move (_9.0: T);             // scope 9 at C:\src\rust\library\core\src\mem\manually_drop.rs:89:9: 89:19
        StorageDead(_9);                 // scope 8 at C:\src\rust\library\core\src\mem\maybe_uninit.rs:627:48: 627:49
        StorageDead(_5);                 // scope 3 at C:\src\rust\library\core\src\ptr\mod.rs:1197:29: 1197:30
        StorageDead(_2);                 // scope 2 at C:\src\rust\library\core\src\ptr\mod.rs:1198:9: 1198:10
        StorageDead(_6);                 // scope 0 at C:\src\rust\tests\codegen\aaaaaa-mir-dump-demo.rs:12:5: 12:27
        return;                          // scope 0 at C:\src\rust\tests\codegen\aaaaaa-mir-dump-demo.rs:13:2: 13:2
    }

That's of course not a semantic problem, as both work fine, but I think it makes a nice demonstration of how this ends up being practically useful, especially in conjunction with MIR inlining.

// could sometimes optimize them away, but because `read` is at the core
// of so many things, not having them in the first place improves what we
// hand off to the backend. For example, `mem::replace::<Big>` previously
// emitted 4 `alloca` and 6 `memcpy`s, but is now 1 `alloc` and 3 `memcpy`s.
// - In general, this approach keeps us from getting any more bugs (like
// #106369) that boil down to "`read(p)` is worse than `*p`", as this
// makes them look identical to the backend (or other MIR consumers).
//
// Future enhancements to MIR optimizations might well allow this to return
// to the previous implementation, rather than using an intrinsic.
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved

// SAFETY: the caller must guarantee that `src` is valid for reads.
unsafe {
assert_unsafe_precondition!(
"ptr::read requires that the pointer argument is aligned and non-null",
[T](src: *const T) => is_aligned_and_not_null(src)
);
copy_nonoverlapping(src, tmp.as_mut_ptr(), 1);
tmp.assume_init()

#[cfg(bootstrap)]
{
// We are calling the intrinsics directly to avoid function calls in the
// generated code as `intrinsics::copy_nonoverlapping` is a wrapper function.
extern "rust-intrinsic" {
#[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")]
fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
}

// `src` cannot overlap `tmp` because `tmp` was just allocated on
// the stack as a separate allocated object.
let mut tmp = MaybeUninit::<T>::uninit();
copy_nonoverlapping(src, tmp.as_mut_ptr(), 1);
tmp.assume_init()
}
#[cfg(not(bootstrap))]
{
crate::intrinsics::read_via_copy(src)
}
scottmcm marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
15 changes: 15 additions & 0 deletions tests/codegen/issues/issue-106369.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// compile-flags: -O
// ignore-debug (the extra assertions get in the way)

#![crate_type = "lib"]

// From <https://github.com/rust-lang/rust/issues/106369#issuecomment-1369095304>

// CHECK-LABEL: @issue_106369(
#[no_mangle]
pub unsafe fn issue_106369(ptr: *const &i32) -> bool {
// CHECK-NOT: icmp
// CHECK: ret i1 true
// CHECK-NOT: icmp
Some(std::ptr::read(ptr)).is_some()
}
38 changes: 38 additions & 0 deletions tests/codegen/issues/issue-73258.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// compile-flags: -O
// ignore-debug (the extra assertions get in the way)

#![crate_type = "lib"]

// Adapted from <https://github.com/rust-lang/rust/issues/73258#issue-637346014>

#[derive(Clone, Copy)]
#[repr(u8)]
pub enum Foo {
A, B, C, D,
}

// CHECK-LABEL: @issue_73258(
#[no_mangle]
pub unsafe fn issue_73258(ptr: *const Foo) -> Foo {
// CHECK-NOT: icmp
// CHECK-NOT: call
// CHECK-NOT: br
// CHECK-NOT: select

// CHECK: %[[R:.+]] = load i8
// CHECK-SAME: !range !

// CHECK-NOT: icmp
// CHECK-NOT: call
// CHECK-NOT: br
// CHECK-NOT: select

// CHECK: ret i8 %[[R]]

// CHECK-NOT: icmp
// CHECK-NOT: call
// CHECK-NOT: br
// CHECK-NOT: select
let k: Option<Foo> = Some(ptr.read());
return k.unwrap();
}
36 changes: 36 additions & 0 deletions tests/codegen/mem-replace-big-type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// This test ensures that `mem::replace::<T>` only ever calls `@llvm.memcpy`
// with `size_of::<T>()` as the size, and never goes through any wrapper that
// may e.g. multiply `size_of::<T>()` with a variable "count" (which is only
// known to be `1` after inlining).

// compile-flags: -C no-prepopulate-passes -Zinline-mir=no
// ignore-debug: the debug assertions get in the way

#![crate_type = "lib"]

#[repr(C, align(8))]
pub struct Big([u64; 7]);
pub fn replace_big(dst: &mut Big, src: Big) -> Big {
// Before the `read_via_copy` intrinsic, this emitted six `memcpy`s.
std::mem::replace(dst, src)
}

// NOTE(eddyb) the `CHECK-NOT`s ensure that the only calls of `@llvm.memcpy` in
// the entire output, are the direct calls we want, from `ptr::replace`.

// CHECK-NOT: call void @llvm.memcpy

// For a large type, we expect exactly three `memcpy`s
// CHECK-LABEL: define internal void @{{.+}}mem{{.+}}replace{{.+}}sret(%Big)
// CHECK-NOT: alloca
// CHECK: alloca %Big
// CHECK-NOT: alloca
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %{{.*}}, {{i8\*|ptr}} align 8 %{{.*}}, i{{.*}} 56, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %{{.*}}, {{i8\*|ptr}} align 8 %{{.*}}, i{{.*}} 56, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %{{.*}}, {{i8\*|ptr}} align 8 %{{.*}}, i{{.*}} 56, i1 false)
// CHECK-NOT: call void @llvm.memcpy

// CHECK-NOT: call void @llvm.memcpy
21 changes: 15 additions & 6 deletions tests/codegen/mem-replace-direct-memcpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,21 @@ pub fn replace_byte(dst: &mut u8, src: u8) -> u8 {
}

// NOTE(eddyb) the `CHECK-NOT`s ensure that the only calls of `@llvm.memcpy` in
// the entire output, are the two direct calls we want, from `ptr::replace`.
// the entire output, are the direct calls we want, from `ptr::replace`.

// CHECK-NOT: call void @llvm.memcpy
// CHECK: ; core::mem::replace
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %{{.*}}, {{i8\*|ptr}} align 1 %{{.*}}, i{{.*}} 1, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %{{.*}}, {{i8\*|ptr}} align 1 %{{.*}}, i{{.*}} 1, i1 false)

// For a small type, we expect one each of `load`/`store`/`memcpy` instead
// CHECK-LABEL: define internal noundef i8 @{{.+}}mem{{.+}}replace
// CHECK-NOT: alloca
// CHECK: alloca i8
// CHECK-NOT: alloca
// CHECK-NOT: call void @llvm.memcpy
// CHECK: load i8
// CHECK-NOT: call void @llvm.memcpy
// CHECK: store i8
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %{{.*}}, {{i8\*|ptr}} align 1 %{{.*}}, i{{.*}} 1, i1 false)
// CHECK-NOT: call void @llvm.memcpy

// CHECK-NOT: call void @llvm.memcpy
96 changes: 96 additions & 0 deletions tests/codegen/ptr-read-metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// compile-flags: -O -Z merge-functions=disabled
// no-system-llvm
// ignore-debug (the extra assertions get in the way)

#![crate_type = "lib"]

// Ensure that various forms of reading pointers correctly annotate the `load`s
// with `!noundef` and `!range` metadata to enable extra optimization.

use std::mem::MaybeUninit;

// CHECK-LABEL: define noundef i8 @copy_byte(
#[no_mangle]
pub unsafe fn copy_byte(p: *const u8) -> u8 {
// CHECK-NOT: load
// CHECK: load i8, ptr %p, align 1
// CHECK-SAME: !noundef !
// CHECK-NOT: load
*p
}

// CHECK-LABEL: define noundef i8 @read_byte(
#[no_mangle]
pub unsafe fn read_byte(p: *const u8) -> u8 {
// CHECK-NOT: load
// CHECK: load i8, ptr %p, align 1
// CHECK-SAME: !noundef !
// CHECK-NOT: load
p.read()
}

// CHECK-LABEL: define i8 @read_byte_maybe_uninit(
#[no_mangle]
pub unsafe fn read_byte_maybe_uninit(p: *const MaybeUninit<u8>) -> MaybeUninit<u8> {
// CHECK-NOT: load
// CHECK: load i8, ptr %p, align 1
// CHECK-NOT: noundef
// CHECK-NOT: load
p.read()
}

// CHECK-LABEL: define noundef i8 @read_byte_assume_init(
#[no_mangle]
pub unsafe fn read_byte_assume_init(p: &MaybeUninit<u8>) -> u8 {
// CHECK-NOT: load
// CHECK: load i8, ptr %p, align 1
// CHECK-SAME: !noundef !
// CHECK-NOT: load
p.assume_init_read()
}

// CHECK-LABEL: define noundef i32 @copy_char(
#[no_mangle]
pub unsafe fn copy_char(p: *const char) -> char {
// CHECK-NOT: load
// CHECK: load i32, ptr %p
// CHECK-SAME: !range ![[RANGE:[0-9]+]]
// CHECK-SAME: !noundef !
// CHECK-NOT: load
*p
}

// CHECK-LABEL: define noundef i32 @read_char(
#[no_mangle]
pub unsafe fn read_char(p: *const char) -> char {
// CHECK-NOT: load
// CHECK: load i32, ptr %p
// CHECK-SAME: !range ![[RANGE]]
// CHECK-SAME: !noundef !
// CHECK-NOT: load
p.read()
}

// CHECK-LABEL: define i32 @read_char_maybe_uninit(
#[no_mangle]
pub unsafe fn read_char_maybe_uninit(p: *const MaybeUninit<char>) -> MaybeUninit<char> {
// CHECK-NOT: load
// CHECK: load i32, ptr %p
// CHECK-NOT: range
// CHECK-NOT: noundef
// CHECK-NOT: load
p.read()
}

// CHECK-LABEL: define noundef i32 @read_char_assume_init(
#[no_mangle]
pub unsafe fn read_char_assume_init(p: &MaybeUninit<char>) -> char {
// CHECK-NOT: load
// CHECK: load i32, ptr %p
// CHECK-SAME: !range ![[RANGE]]
// CHECK-SAME: !noundef !
// CHECK-NOT: load
p.assume_init_read()
}

// CHECK: ![[RANGE]] = !{i32 0, i32 1114112}
Loading