diff --git a/Cargo.lock b/Cargo.lock
index a1a02c65d9483..232c9c9441fb4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -24,6 +24,17 @@ dependencies = [
  "rustc-std-workspace-core",
 ]
 
+[[package]]
+name = "aes"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2"
+dependencies = [
+ "cfg-if",
+ "cipher",
+ "cpufeatures",
+]
+
 [[package]]
 name = "ahash"
 version = "0.8.3"
@@ -470,6 +481,16 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common",
+ "inout",
+]
+
 [[package]]
 name = "clap"
 version = "4.4.4"
@@ -1984,6 +2005,15 @@ version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306"
 
+[[package]]
+name = "inout"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "installer"
 version = "0.0.0"
@@ -2465,6 +2495,7 @@ dependencies = [
 name = "miri"
 version = "0.1.0"
 dependencies = [
+ "aes",
  "colored",
  "ctrlc",
  "env_logger 0.10.0",
diff --git a/src/tools/miri/.github/workflows/ci.yml b/src/tools/miri/.github/workflows/ci.yml
index f026b7fd10452..67b48a3742da0 100644
--- a/src/tools/miri/.github/workflows/ci.yml
+++ b/src/tools/miri/.github/workflows/ci.yml
@@ -188,7 +188,7 @@ jobs:
         with:
           fetch-depth: 256 # get a bit more of the history
       - name: install josh-proxy
-        run: cargo +stable install josh-proxy --git https://github.com/josh-project/josh --tag r22.12.06
+        run: RUSTFLAGS="--cap-lints warn" cargo +stable install josh-proxy --git https://github.com/josh-project/josh --tag r22.12.06
       - name: setup bot git name and email
         run: |
           git config --global user.name 'The Miri Conjob Bot'
@@ -208,7 +208,7 @@ jobs:
           git push -u origin $BRANCH
       - name: Create Pull Request
         run: |
-          PR=$(gh pr create -B master --title 'Automatic sync from rustc' --body '')
+          PR=$(gh pr create -B master --title 'Automatic Rustup' --body '')
           ~/.local/bin/zulip-send --user $ZULIP_BOT_EMAIL --api-key $ZULIP_API_TOKEN --site https://rust-lang.zulipchat.com \
             --stream miri --subject "Cron Job Failure (miri, $(date -u +%Y-%m))" \
             --message "A PR doing a rustc-pull [has been automatically created]($PR) for your convenience."
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index e654932255ae6..62370206956c6 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -17,6 +17,17 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "aes"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2"
+dependencies = [
+ "cfg-if",
+ "cipher",
+ "cpufeatures",
+]
+
 [[package]]
 name = "aho-corasick"
 version = "1.1.1"
@@ -142,6 +153,16 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common",
+ "inout",
+]
+
 [[package]]
 name = "color-eyre"
 version = "0.6.2"
@@ -199,6 +220,15 @@ dependencies = [
  "windows-sys 0.45.0",
 ]
 
+[[package]]
+name = "cpufeatures"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.8"
@@ -218,6 +248,16 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
 [[package]]
 name = "ctrlc"
 version = "3.4.1"
@@ -284,6 +324,16 @@ version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
 
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.2.10"
@@ -332,6 +382,15 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "inout"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "instant"
 version = "0.1.12"
@@ -469,6 +528,7 @@ dependencies = [
 name = "miri"
 version = "0.1.0"
 dependencies = [
+ "aes",
  "colored",
  "ctrlc",
  "env_logger",
@@ -726,9 +786,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.17"
+version = "0.38.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
+checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
 dependencies = [
  "bitflags 2.4.0",
  "errno",
@@ -909,6 +969,12 @@ dependencies = [
  "tracing-core",
 ]
 
+[[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+
 [[package]]
 name = "ui_test"
 version = "0.21.2"
@@ -954,6 +1020,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
 
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
 [[package]]
 name = "wasi"
 version = "0.11.0+wasi-snapshot-preview1"
diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml
index c911a153c1373..f8e507a11b022 100644
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@@ -23,6 +23,7 @@ env_logger = "0.10"
 log = "0.4"
 rand = "0.8"
 smallvec = "1.7"
+aes = { version = "0.8.3", features = ["hazmat"] }
 
 measureme = "10.0.0"
 ctrlc = "3.2.5"
diff --git a/src/tools/miri/cargo-miri/Cargo.lock b/src/tools/miri/cargo-miri/Cargo.lock
index c37a5ca88756f..8f442b3de13a1 100644
--- a/src/tools/miri/cargo-miri/Cargo.lock
+++ b/src/tools/miri/cargo-miri/Cargo.lock
@@ -238,9 +238,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.17"
+version = "0.38.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
+checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
 dependencies = [
  "bitflags 2.4.0",
  "errno",
diff --git a/src/tools/miri/miri-script/Cargo.lock b/src/tools/miri/miri-script/Cargo.lock
index 6f8dd973fdd91..ea306ed838a1c 100644
--- a/src/tools/miri/miri-script/Cargo.lock
+++ b/src/tools/miri/miri-script/Cargo.lock
@@ -213,9 +213,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.17"
+version = "0.38.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
+checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
 dependencies = [
  "bitflags 2.4.0",
  "errno",
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 8f0a0a045abdb..bc3882bcf2b05 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-4ea5190026dbc1302b644d938e68bc6843cb8b24
+249624b5043013d18c00f0401ca431c1a6baa8cd
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
index 32d4d96b069bc..e902939290a0b 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@@ -206,11 +206,7 @@ trait EvalContextPrivExt<'mir: 'ecx, 'tcx: 'mir, 'ecx>: crate::MiriInterpCxExt<'
         // Make sure the new permission makes sense as the initial permission of a fresh tag.
         assert!(new_perm.initial_state.is_initial());
         // Ensure we bail out if the pointer goes out-of-bounds (see miri#1050).
-        this.check_ptr_access(
-            place.ptr(),
-            ptr_size,
-            CheckInAllocMsg::InboundsTest,
-        )?;
+        this.check_ptr_access(place.ptr(), ptr_size, CheckInAllocMsg::InboundsTest)?;
 
         // It is crucial that this gets called on all code paths, to ensure we track tag creation.
         let log_creation = |this: &MiriInterpCx<'mir, 'tcx>,
diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
index f3a8f1c25d7b2..bec2972c50d6d 100644
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@@ -1017,10 +1017,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
         // be 8-aligned).
         let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-        this.check_ptr_align(
-            place.ptr(),
-            align,
-        )?;
+        this.check_ptr_align(place.ptr(), align)?;
         // Ensure the allocation is mutable. Even failing (read-only) compare_exchange need mutable
         // memory on many targets (i.e., they segfault if taht memory is mapped read-only), and
         // atomic loads can be implemented via compare_exchange on some targets. There could
diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs
index 690c6619aba81..965cd534d1e5c 100644
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@@ -868,9 +868,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         let size2 = Size::from_bytes(2);
         let this = self.eval_context_mut();
         this.check_ptr_align(ptr, Align::from_bytes(2).unwrap())?;
-        let mut alloc = this
-            .get_ptr_alloc_mut(ptr, size2 * string_length)?
-            .unwrap(); // not a ZST, so we will get a result
+        let mut alloc = this.get_ptr_alloc_mut(ptr, size2 * string_length)?.unwrap(); // not a ZST, so we will get a result
         for (offset, wchar) in wide_str.iter().copied().chain(iter::once(0x0000)).enumerate() {
             let offset = u64::try_from(offset).unwrap();
             alloc.write_scalar(alloc_range(size2 * offset, size2), Scalar::from_u16(wchar))?;
diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/intptrcast.rs
index 154d86375ca2a..ab6a256f71450 100644
--- a/src/tools/miri/src/intptrcast.rs
+++ b/src/tools/miri/src/intptrcast.rs
@@ -26,8 +26,10 @@ pub type GlobalState = RefCell<GlobalStateInner>;
 
 #[derive(Clone, Debug)]
 pub struct GlobalStateInner {
-    /// This is used as a map between the address of each allocation and its `AllocId`.
-    /// It is always sorted
+    /// This is used as a map between the address of each allocation and its `AllocId`. It is always
+    /// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the
+    /// base address, and we need to find the `AllocId` it belongs to.
+    /// This is not the *full* inverse of `base_addr`; dead allocations have been removed.
     int_to_ptr_map: Vec<(u64, AllocId)>,
     /// The base address for each allocation.  We cannot put that into
     /// `AllocExtra` because function pointers also have a base address, and
@@ -62,10 +64,21 @@ impl GlobalStateInner {
     }
 }
 
-impl<'mir, 'tcx> GlobalStateInner {
+/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
+/// of `align` that is larger or equal to `addr`
+fn align_addr(addr: u64, align: u64) -> u64 {
+    match addr % align {
+        0 => addr,
+        rem => addr.checked_add(align).unwrap() - rem,
+    }
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
     // Returns the exposed `AllocId` that corresponds to the specified addr,
     // or `None` if the addr is out of bounds
-    fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option<AllocId> {
+    fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
+        let ecx = self.eval_context_ref();
         let global_state = ecx.machine.intptrcast.borrow();
         assert!(global_state.provenance_mode != ProvenanceMode::Strict);
 
@@ -82,94 +95,40 @@ impl<'mir, 'tcx> GlobalStateInner {
                 let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
                 // This never overflows because `addr >= glb`
                 let offset = addr - glb;
-                // If the offset exceeds the size of the allocation, don't use this `alloc_id`.
+                // We require this to be strict in-bounds of the allocation. This arm is only
+                // entered for addresses that are not the base address, so even zero-sized
+                // allocations will get recognized at their base address -- but all other
+                // allocations will *not* be recognized at their "end" address.
                 let size = ecx.get_alloc_info(alloc_id).0;
-                if offset <= size.bytes() { Some(alloc_id) } else { None }
+                if offset < size.bytes() { Some(alloc_id) } else { None }
             }
         }?;
 
-        // We only use this provenance if it has been exposed, *and* is still live.
+        // We only use this provenance if it has been exposed.
         if global_state.exposed.contains(&alloc_id) {
-            let (_size, _align, kind) = ecx.get_alloc_info(alloc_id);
-            match kind {
-                AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => {
-                    return Some(alloc_id);
-                }
-                AllocKind::Dead => {}
-            }
-        }
-
-        None
-    }
-
-    pub fn expose_ptr(
-        ecx: &mut MiriInterpCx<'mir, 'tcx>,
-        alloc_id: AllocId,
-        tag: BorTag,
-    ) -> InterpResult<'tcx> {
-        let global_state = ecx.machine.intptrcast.get_mut();
-        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
-        if global_state.provenance_mode != ProvenanceMode::Strict {
-            trace!("Exposing allocation id {alloc_id:?}");
-            global_state.exposed.insert(alloc_id);
-            if ecx.machine.borrow_tracker.is_some() {
-                ecx.expose_tag(alloc_id, tag)?;
-            }
-        }
-        Ok(())
-    }
-
-    pub fn ptr_from_addr_cast(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        addr: u64,
-    ) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
-        trace!("Casting {:#x} to a pointer", addr);
-
-        // Potentially emit a warning.
-        let global_state = ecx.machine.intptrcast.borrow();
-        match global_state.provenance_mode {
-            ProvenanceMode::Default => {
-                // The first time this happens at a particular location, print a warning.
-                thread_local! {
-                    // `Span` is non-`Send`, so we use a thread-local instead.
-                    static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
-                }
-                PAST_WARNINGS.with_borrow_mut(|past_warnings| {
-                    let first = past_warnings.is_empty();
-                    if past_warnings.insert(ecx.cur_span()) {
-                        // Newly inserted, so first time we see this span.
-                        ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
-                    }
-                });
-            }
-            ProvenanceMode::Strict => {
-                throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
-            }
-            ProvenanceMode::Permissive => {}
+            // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
+            debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead));
+            Some(alloc_id)
+        } else {
+            None
         }
-
-        // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
-        // completely legal to do a cast and then `wrapping_offset` to another allocation and only
-        // *then* do a memory access. So the allocation that the pointer happens to point to on a
-        // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
-        // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
-        // allocation it might be referencing.
-        Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
     }
 
-    fn alloc_base_addr(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        alloc_id: AllocId,
-    ) -> InterpResult<'tcx, u64> {
+    fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
+        let ecx = self.eval_context_ref();
         let mut global_state = ecx.machine.intptrcast.borrow_mut();
         let global_state = &mut *global_state;
 
         Ok(match global_state.base_addr.entry(alloc_id) {
             Entry::Occupied(entry) => *entry.get(),
             Entry::Vacant(entry) => {
-                // There is nothing wrong with a raw pointer being cast to an integer only after
-                // it became dangling.  Hence we allow dead allocations.
-                let (size, align, _kind) = ecx.get_alloc_info(alloc_id);
+                let (size, align, kind) = ecx.get_alloc_info(alloc_id);
+                // This is either called immediately after allocation (and then cached), or when
+                // adjusting `tcx` pointers (which never get freed). So assert that we are looking
+                // at a live allocation. This also ensures that we never re-assign an address to an
+                // allocation that previously had an address, but then was freed and the address
+                // information was removed.
+                assert!(!matches!(kind, AllocKind::Dead));
 
                 // This allocation does not have a base address yet, pick one.
                 // Leave some space to the previous allocation, to give it some chance to be less aligned.
@@ -183,7 +142,7 @@ impl<'mir, 'tcx> GlobalStateInner {
                     .next_base_addr
                     .checked_add(slack)
                     .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-                let base_addr = Self::align_addr(base_addr, align.bytes());
+                let base_addr = align_addr(base_addr, align.bytes());
                 entry.insert(base_addr);
                 trace!(
                     "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
@@ -205,6 +164,7 @@ impl<'mir, 'tcx> GlobalStateInner {
                 if global_state.next_base_addr > ecx.target_usize_max() {
                     throw_exhaust!(AddressSpaceFull);
                 }
+                // Also maintain the opposite mapping in `int_to_ptr_map`.
                 // Given that `next_base_addr` increases in each allocation, pushing the
                 // corresponding tuple keeps `int_to_ptr_map` sorted
                 global_state.int_to_ptr_map.push((base_addr, alloc_id));
@@ -213,43 +173,95 @@ impl<'mir, 'tcx> GlobalStateInner {
             }
         })
     }
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
+    fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
+        let ecx = self.eval_context_mut();
+        let global_state = ecx.machine.intptrcast.get_mut();
+        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
+        if global_state.provenance_mode != ProvenanceMode::Strict {
+            trace!("Exposing allocation id {alloc_id:?}");
+            global_state.exposed.insert(alloc_id);
+            if ecx.machine.borrow_tracker.is_some() {
+                ecx.expose_tag(alloc_id, tag)?;
+            }
+        }
+        Ok(())
+    }
+
+    fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
+        trace!("Casting {:#x} to a pointer", addr);
+
+        let ecx = self.eval_context_ref();
+        let global_state = ecx.machine.intptrcast.borrow();
+
+        // Potentially emit a warning.
+        match global_state.provenance_mode {
+            ProvenanceMode::Default => {
+                // The first time this happens at a particular location, print a warning.
+                thread_local! {
+                    // `Span` is non-`Send`, so we use a thread-local instead.
+                    static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
+                }
+                PAST_WARNINGS.with_borrow_mut(|past_warnings| {
+                    let first = past_warnings.is_empty();
+                    if past_warnings.insert(ecx.cur_span()) {
+                        // Newly inserted, so first time we see this span.
+                        ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
+                    }
+                });
+            }
+            ProvenanceMode::Strict => {
+                throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
+            }
+            ProvenanceMode::Permissive => {}
+        }
+
+        // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
+        // completely legal to do a cast and then `wrapping_offset` to another allocation and only
+        // *then* do a memory access. So the allocation that the pointer happens to point to on a
+        // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
+        // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
+        // allocation it might be referencing.
+        Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
+    }
 
     /// Convert a relative (tcx) pointer to a Miri pointer.
-    pub fn ptr_from_rel_ptr(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
+    fn ptr_from_rel_ptr(
+        &self,
         ptr: Pointer<AllocId>,
         tag: BorTag,
     ) -> InterpResult<'tcx, Pointer<Provenance>> {
+        let ecx = self.eval_context_ref();
+
         let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
-        let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?;
+        let base_addr = ecx.addr_from_alloc_id(alloc_id)?;
 
         // Add offset with the right kind of pointer-overflowing arithmetic.
         let dl = ecx.data_layout();
         let absolute_addr = dl.overflowing_offset(base_addr, offset.bytes()).0;
-        Ok(Pointer::new(
-            Provenance::Concrete { alloc_id, tag },
-            Size::from_bytes(absolute_addr),
-        ))
+        Ok(Pointer::new(Provenance::Concrete { alloc_id, tag }, Size::from_bytes(absolute_addr)))
     }
 
     /// When a pointer is used for a memory access, this computes where in which allocation the
     /// access is going.
-    pub fn ptr_get_alloc(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        ptr: Pointer<Provenance>,
-    ) -> Option<(AllocId, Size)> {
+    fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
+        let ecx = self.eval_context_ref();
+
         let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)
 
         let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
             alloc_id
         } else {
             // A wildcard pointer.
-            GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())?
+            ecx.alloc_id_from_addr(addr.bytes())?
         };
 
         // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr
-        // must have been called in the past.
-        let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap();
+        // must have been called in the past, so we can just look up the address in the map.
+        let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap();
 
         // Wrapping "addr - base_addr"
         #[allow(clippy::cast_possible_wrap)] // we want to wrap here
@@ -259,14 +271,24 @@ impl<'mir, 'tcx> GlobalStateInner {
             Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0),
         ))
     }
+}
 
-    /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
-    /// of `align` that is larger or equal to `addr`
-    fn align_addr(addr: u64, align: u64) -> u64 {
-        match addr % align {
-            0 => addr,
-            rem => addr.checked_add(align).unwrap() - rem,
-        }
+impl GlobalStateInner {
+    pub fn free_alloc_id(&mut self, dead_id: AllocId) {
+        // We can *not* remove this from `base_addr`, since the interpreter design requires that we
+        // be able to retrieve an AllocId + offset for any memory access *before* we check if the
+        // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
+        // access to determine the allocation ID and offset -- and there can still be pointers with
+        // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
+        // `None` only if the pointer truly has no provenance (this ensures consistent error
+        // messages).
+        // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
+        // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
+        // returns a dead allocation.
+        self.int_to_ptr_map.retain(|&(_, id)| id != dead_id);
+        // We can also remove it from `exposed`, since this allocation can anyway not be returned by
+        // `alloc_id_from_addr` any more.
+        self.exposed.remove(&dead_id);
     }
 }
 
@@ -276,7 +298,7 @@ mod tests {
 
     #[test]
     fn test_align_addr() {
-        assert_eq!(GlobalStateInner::align_addr(37, 4), 40);
-        assert_eq!(GlobalStateInner::align_addr(44, 4), 44);
+        assert_eq!(align_addr(37, 4), 40);
+        assert_eq!(align_addr(44, 4), 44);
     }
 }
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index f1d8ce01bc24c..68b9164dec0dd 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -117,7 +117,7 @@ pub use crate::eval::{
     create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
 };
 pub use crate::helpers::EvalContextExt as _;
-pub use crate::intptrcast::ProvenanceMode;
+pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode};
 pub use crate::machine::{
     AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind,
     PrimitiveLayouts, Provenance, ProvenanceExtra,
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index 1073db6f1a0bd..d5775912eabea 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -1006,7 +1006,10 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
     }
 
     #[inline(always)]
-    fn generate_nan<F1: rustc_apfloat::Float + rustc_apfloat::FloatConvert<F2>, F2: rustc_apfloat::Float>(
+    fn generate_nan<
+        F1: rustc_apfloat::Float + rustc_apfloat::FloatConvert<F2>,
+        F2: rustc_apfloat::Float,
+    >(
         ecx: &InterpCx<'mir, 'tcx, Self>,
         inputs: &[F1],
     ) -> F2 {
@@ -1146,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
             // Value does not matter, SB is disabled
             BorTag::default()
         };
-        intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag)
+        ecx.ptr_from_rel_ptr(ptr, tag)
     }
 
     /// Called on `usize as ptr` casts.
@@ -1155,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         ecx: &MiriInterpCx<'mir, 'tcx>,
         addr: u64,
     ) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>> {
-        intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr)
+        ecx.ptr_from_addr_cast(addr)
     }
 
     /// Called on `ptr as usize` casts.
@@ -1166,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         ptr: Pointer<Self::Provenance>,
     ) -> InterpResult<'tcx> {
         match ptr.provenance {
-            Provenance::Concrete { alloc_id, tag } =>
-                intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag),
+            Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
             Provenance::Wildcard => {
                 // No need to do anything for wildcard pointers as
                 // their provenances have already been previously exposed.
@@ -1188,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         ecx: &MiriInterpCx<'mir, 'tcx>,
         ptr: Pointer<Self::Provenance>,
     ) -> Option<(AllocId, Size, Self::ProvenanceExtra)> {
-        let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr);
+        let rel = ecx.ptr_get_alloc(ptr);
 
         rel.map(|(alloc_id, size)| {
             let tag = match ptr.provenance {
@@ -1260,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         {
             *deallocated_at = Some(machine.current_span());
         }
+        machine.intptrcast.get_mut().free_alloc_id(alloc_id);
         Ok(())
     }
 
diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs
index 0f4be5e154aed..2d5df3037452f 100644
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@@ -805,12 +805,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 this.ptr_get_alloc_id(ptr_dest)?;
                 this.ptr_get_alloc_id(ptr_src)?;
 
-                this.mem_copy(
-                    ptr_src,
-                    ptr_dest,
-                    Size::from_bytes(n),
-                    true,
-                )?;
+                this.mem_copy(ptr_src, ptr_dest, Size::from_bytes(n), true)?;
                 this.write_pointer(ptr_dest, dest)?;
             }
             "strcpy" => {
@@ -826,12 +821,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 // reason to have `strcpy` destroy pointer provenance.
                 // This reads at least 1 byte, so we are already enforcing that this is a valid pointer.
                 let n = this.read_c_str(ptr_src)?.len().checked_add(1).unwrap();
-                this.mem_copy(
-                    ptr_src,
-                    ptr_dest,
-                    Size::from_bytes(n),
-                    true,
-                )?;
+                this.mem_copy(ptr_src, ptr_dest, Size::from_bytes(n), true)?;
                 this.write_pointer(ptr_dest, dest)?;
             }
 
diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs
index b0592b68a9e0e..062623a7f6a60 100644
--- a/src/tools/miri/src/shims/unix/fs.rs
+++ b/src/tools/miri/src/shims/unix/fs.rs
@@ -756,11 +756,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         trace!("Reading from FD {}, size {}", fd, count);
 
         // Check that the *entire* buffer is actually valid memory.
-        this.check_ptr_access(
-            buf,
-            Size::from_bytes(count),
-            CheckInAllocMsg::MemoryAccessTest,
-        )?;
+        this.check_ptr_access(buf, Size::from_bytes(count), CheckInAllocMsg::MemoryAccessTest)?;
 
         // We cap the number of read bytes to the largest value that we are able to fit in both the
         // host's and target's `isize`. This saves us from having to handle overflows later.
@@ -809,11 +805,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         // Isolation check is done via `FileDescriptor` trait.
 
         // Check that the *entire* buffer is actually valid memory.
-        this.check_ptr_access(
-            buf,
-            Size::from_bytes(count),
-            CheckInAllocMsg::MemoryAccessTest,
-        )?;
+        this.check_ptr_access(buf, Size::from_bytes(count), CheckInAllocMsg::MemoryAccessTest)?;
 
         // We cap the number of written bytes to the largest value that we are able to fit in both the
         // host's and target's `isize`. This saves us from having to handle overflows later.
diff --git a/src/tools/miri/src/shims/unix/linux/sync.rs b/src/tools/miri/src/shims/unix/linux/sync.rs
index 17803b52baf00..ff25b8120b1c0 100644
--- a/src/tools/miri/src/shims/unix/linux/sync.rs
+++ b/src/tools/miri/src/shims/unix/linux/sync.rs
@@ -85,10 +85,7 @@ pub fn futex<'tcx>(
                 return Ok(());
             }
 
-            let timeout = this.deref_pointer_as(
-                &args[3],
-                this.libc_ty_layout("timespec"),
-            )?;
+            let timeout = this.deref_pointer_as(&args[3], this.libc_ty_layout("timespec"))?;
             let timeout_time = if this.ptr_is_null(timeout.ptr())? {
                 None
             } else {
diff --git a/src/tools/miri/src/shims/windows/sync.rs b/src/tools/miri/src/shims/windows/sync.rs
index 5e46404e7f13c..2c9603097c857 100644
--- a/src/tools/miri/src/shims/windows/sync.rs
+++ b/src/tools/miri/src/shims/windows/sync.rs
@@ -321,8 +321,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         this.atomic_fence(AtomicFenceOrd::SeqCst)?;
 
         let layout = this.machine.layouts.uint(size).unwrap();
-        let futex_val = this
-            .read_scalar_atomic(&this.ptr_to_mplace(ptr, layout), AtomicReadOrd::Relaxed)?;
+        let futex_val =
+            this.read_scalar_atomic(&this.ptr_to_mplace(ptr, layout), AtomicReadOrd::Relaxed)?;
         let compare_val = this.read_scalar(&this.ptr_to_mplace(compare, layout))?;
 
         if futex_val == compare_val {
diff --git a/src/tools/miri/src/shims/x86/aesni.rs b/src/tools/miri/src/shims/x86/aesni.rs
new file mode 100644
index 0000000000000..aef930595b2d3
--- /dev/null
+++ b/src/tools/miri/src/shims/x86/aesni.rs
@@ -0,0 +1,168 @@
+use rustc_middle::ty::layout::LayoutOf as _;
+use rustc_middle::ty::Ty;
+use rustc_span::Symbol;
+use rustc_target::spec::abi::Abi;
+
+use crate::*;
+use shims::foreign_items::EmulateForeignItemResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_aesni_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateForeignItemResult> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.aesni.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement the _mm_aesdec_si128, _mm256_aesdec_epi128
+            // and _mm512_aesdec_epi128 functions.
+            // Performs one round of an AES decryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
+            "aesdec" | "aesdec.256" | "aesdec.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let key = aes::Block::from(key.to_le_bytes());
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::equiv_inv_cipher_round` documentation states that
+                    // it performs the same operation as the x86 aesdec instruction.
+                    aes::hazmat::equiv_inv_cipher_round(&mut state, &key);
+                    u128::from_le_bytes(state.into())
+                })?;
+            }
+            // Used to implement the _mm_aesdeclast_si128, _mm256_aesdeclast_epi128
+            // and _mm512_aesdeclast_epi128 functions.
+            // Performs last round of an AES decryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
+            "aesdeclast" | "aesdeclast.256" | "aesdeclast.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::equiv_inv_cipher_round` does the following operations:
+                    // state = InvShiftRows(state)
+                    // state = InvSubBytes(state)
+                    // state = InvMixColumns(state)
+                    // state = state ^ key
+                    // But we need to skip the InvMixColumns.
+                    // First, use a zeroed key to skip the XOR.
+                    aes::hazmat::equiv_inv_cipher_round(&mut state, &aes::Block::from([0; 16]));
+                    // Then, undo the InvMixColumns with MixColumns.
+                    aes::hazmat::mix_columns(&mut state);
+                    // Finally, do the XOR.
+                    u128::from_le_bytes(state.into()) ^ key
+                })?;
+            }
+            // Used to implement the _mm_aesenc_si128, _mm256_aesenc_epi128
+            // and _mm512_aesenc_epi128 functions.
+            // Performs one round of an AES encryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128
+            "aesenc" | "aesenc.256" | "aesenc.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let key = aes::Block::from(key.to_le_bytes());
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::cipher_round` documentation states that
+                    // it performs the same operation as the x86 aesenc instruction.
+                    aes::hazmat::cipher_round(&mut state, &key);
+                    u128::from_le_bytes(state.into())
+                })?;
+            }
+            // Used to implement the _mm_aesenclast_si128, _mm256_aesenclast_epi128
+            // and _mm512_aesenclast_epi128 functions.
+            // Performs last round of an AES encryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
+            "aesenclast" | "aesenclast.256" | "aesenclast.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::cipher_round` does the following operations:
+                    // state = ShiftRows(state)
+                    // state = SubBytes(state)
+                    // state = MixColumns(state)
+                    // state = state ^ key
+                    // But we need to skip the MixColumns.
+                    // First, use a zeroed key to skip the XOR.
+                    aes::hazmat::cipher_round(&mut state, &aes::Block::from([0; 16]));
+                    // Then, undo the MixColumns with InvMixColumns.
+                    aes::hazmat::inv_mix_columns(&mut state);
+                    // Finally, do the XOR.
+                    u128::from_le_bytes(state.into()) ^ key
+                })?;
+            }
+            // Used to implement the _mm_aesimc_si128 function.
+            // Performs the AES InvMixColumns operation on `op`
+            "aesimc" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                // Transmute to `u128`
+                let op = op.transmute(this.machine.layouts.u128, this)?;
+                let dest = dest.transmute(this.machine.layouts.u128, this)?;
+
+                let state = this.read_scalar(&op)?.to_u128()?;
+                let mut state = aes::Block::from(state.to_le_bytes());
+                aes::hazmat::inv_mix_columns(&mut state);
+
+                this.write_scalar(Scalar::from_u128(u128::from_le_bytes(state.into())), &dest)?;
+            }
+            // TODO: Implement the `llvm.x86.aesni.aeskeygenassist` when possible
+            // with an external crate.
+            _ => return Ok(EmulateForeignItemResult::NotSupported),
+        }
+        Ok(EmulateForeignItemResult::NeedsJumping)
+    }
+}
+
+// Performs an AES round (given by `f`) on each 128-bit word of
+// `state` with the corresponding 128-bit key of `key`.
+fn aes_round<'tcx>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    state: &OpTy<'tcx, Provenance>,
+    key: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+    f: impl Fn(u128, u128) -> u128,
+) -> InterpResult<'tcx, ()> {
+    assert_eq!(dest.layout.size, state.layout.size);
+    assert_eq!(dest.layout.size, key.layout.size);
+
+    // Transmute arguments to arrays of `u128`.
+    assert_eq!(dest.layout.size.bytes() % 16, 0);
+    let len = dest.layout.size.bytes() / 16;
+
+    let u128_array_layout =
+        this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, len))?;
+
+    let state = state.transmute(u128_array_layout, this)?;
+    let key = key.transmute(u128_array_layout, this)?;
+    let dest = dest.transmute(u128_array_layout, this)?;
+
+    for i in 0..len {
+        let state = this.read_scalar(&this.project_index(&state, i)?)?.to_u128()?;
+        let key = this.read_scalar(&this.project_index(&key, i)?)?.to_u128()?;
+        let dest = this.project_index(&dest, i)?;
+
+        let res = f(state, key);
+
+        this.write_scalar(Scalar::from_u128(res), &dest)?;
+    }
+
+    Ok(())
+}
diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs
index 53a4a1ef28a7f..d88a3127eccc5 100644
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@@ -7,9 +7,11 @@ use crate::*;
 use helpers::bool_to_simd_element;
 use shims::foreign_items::EmulateForeignItemResult;
 
+mod aesni;
 mod sse;
 mod sse2;
 mod sse3;
+mod sse41;
 mod ssse3;
 
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
@@ -100,6 +102,17 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this, link_name, abi, args, dest,
                 );
             }
+            name if name.starts_with("sse41.") => {
+                return sse41::EvalContextExt::emulate_x86_sse41_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+            name if name.starts_with("aesni.") => {
+                return aesni::EvalContextExt::emulate_x86_aesni_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         }
         Ok(EmulateForeignItemResult::NeedsJumping)
diff --git a/src/tools/miri/src/shims/x86/sse3.rs b/src/tools/miri/src/shims/x86/sse3.rs
index 252384a0aa9db..246e9e9c6cb28 100644
--- a/src/tools/miri/src/shims/x86/sse3.rs
+++ b/src/tools/miri/src/shims/x86/sse3.rs
@@ -73,12 +73,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 let src_ptr = this.read_pointer(src_ptr)?;
                 let dest = dest.force_mplace(this)?;
 
-                this.mem_copy(
-                    src_ptr,
-                    dest.ptr(),
-                    dest.layout.size,
-                    /*nonoverlapping*/ true,
-                )?;
+                this.mem_copy(src_ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
             }
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         }
diff --git a/src/tools/miri/src/shims/x86/sse41.rs b/src/tools/miri/src/shims/x86/sse41.rs
new file mode 100644
index 0000000000000..cfa06ded6e681
--- /dev/null
+++ b/src/tools/miri/src/shims/x86/sse41.rs
@@ -0,0 +1,319 @@
+use rustc_middle::mir;
+use rustc_span::Symbol;
+use rustc_target::abi::Size;
+use rustc_target::spec::abi::Abi;
+
+use crate::*;
+use shims::foreign_items::EmulateForeignItemResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_sse41_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateForeignItemResult> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement the _mm_insert_ps function.
+            // Takes one element of `right` and inserts it into `left` and
+            // optionally zero some elements. Source index is specified
+            // in bits `6..=7` of `imm`, destination index is specified in
+            // bits `4..=5` if `imm`, and `i`th bit specifies whether element
+            // `i` is zeroed.
+            "insertps" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, left_len);
+                assert_eq!(dest_len, right_len);
+                assert!(dest_len <= 4);
+
+                let imm = this.read_scalar(imm)?.to_u8()?;
+                let src_index = u64::from((imm >> 6) & 0b11);
+                let dst_index = u64::from((imm >> 4) & 0b11);
+
+                let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;
+
+                for i in 0..dest_len {
+                    let dest = this.project_index(&dest, i)?;
+
+                    if imm & (1 << i) != 0 {
+                        // zeroed
+                        this.write_scalar(Scalar::from_u32(0), &dest)?;
+                    } else if i == dst_index {
+                        // copy from `right` at specified index
+                        this.write_immediate(*src_value, &dest)?;
+                    } else {
+                        // copy from `left`
+                        this.copy_op(
+                            &this.project_index(&left, i)?,
+                            &dest,
+                            /*allow_transmute*/ false,
+                        )?;
+                    }
+                }
+            }
+            // Used to implement the _mm_packus_epi32 function.
+            // Concatenates two 32-bit signed integer vectors and converts
+            // the result to a 16-bit unsigned integer vector with saturation.
+            "packusdw" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(dest_len, left_len.checked_mul(2).unwrap());
+
+                for i in 0..left_len {
+                    let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i32()?;
+                    let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i32()?;
+                    let left_dest = this.project_index(&dest, i)?;
+                    let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
+
+                    let left_res =
+                        u16::try_from(left).unwrap_or(if left < 0 { 0 } else { u16::MAX });
+                    let right_res =
+                        u16::try_from(right).unwrap_or(if right < 0 { 0 } else { u16::MAX });
+
+                    this.write_scalar(Scalar::from_u16(left_res), &left_dest)?;
+                    this.write_scalar(Scalar::from_u16(right_res), &right_dest)?;
+                }
+            }
+            // Used to implement the _mm_dp_ps and _mm_dp_pd functions.
+            // Conditionally multiplies the packed floating-point elements in
+            // `left` and `right` using the high 4 bits in `imm`, sums the four
+            // products, and conditionally stores the sum in `dest` using the low
+            // 4 bits of `imm`.
+            "dpps" | "dppd" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert!(dest_len <= 4);
+
+                let imm = this.read_scalar(imm)?.to_u8()?;
+
+                let element_layout = left.layout.field(this, 0);
+
+                // Calculate dot product
+                // Elements are floating point numbers, but we can use `from_int`
+                // because the representation of 0.0 is all zero bits.
+                let mut sum = ImmTy::from_int(0u8, element_layout);
+                for i in 0..left_len {
+                    if imm & (1 << i.checked_add(4).unwrap()) != 0 {
+                        let left = this.read_immediate(&this.project_index(&left, i)?)?;
+                        let right = this.read_immediate(&this.project_index(&right, i)?)?;
+
+                        let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
+                        sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?;
+                    }
+                }
+
+                // Write to destination (conditioned to imm)
+                for i in 0..dest_len {
+                    let dest = this.project_index(&dest, i)?;
+
+                    if imm & (1 << i) != 0 {
+                        this.write_immediate(*sum, &dest)?;
+                    } else {
+                        this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?;
+                    }
+                }
+            }
+            // Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
+            // functions. Rounds the first element of `right` according to `rounding`
+            // and copies the remaining elements from `left`.
+            "round.ss" => {
+                let [left, right, rounding] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
+            }
+            // Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
+            // functions. Rounds the first element of `right` according to `rounding`
+            // and copies the remaining elements from `left`.
+            "round.sd" => {
+                let [left, right, rounding] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
+            }
+            // Used to implement the _mm_minpos_epu16 function.
+            // Find the minimum unsinged 16-bit integer in `op` and
+            // returns its value and position.
+            "phminposuw" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                // Find minimum
+                let mut min_value = u16::MAX;
+                let mut min_index = 0;
+                for i in 0..op_len {
+                    let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
+                    if op < min_value {
+                        min_value = op;
+                        min_index = i;
+                    }
+                }
+
+                // Write value and index
+                this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
+                this.write_scalar(
+                    Scalar::from_u16(min_index.try_into().unwrap()),
+                    &this.project_index(&dest, 1)?,
+                )?;
+                // Fill remaining with zeros
+                for i in 2..dest_len {
+                    this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
+                }
+            }
+            // Used to implement the _mm_mpsadbw_epu8 function.
+            // Compute the sum of absolute differences of quadruplets of unsigned
+            // 8-bit integers in `left` and `right`, and store the 16-bit results
+            // in `right`. Quadruplets are selected from `left` and `right` with
+            // offsets specified in `imm`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
+            "mpsadbw" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(left_len, dest_len.checked_mul(2).unwrap());
+
+                let imm = this.read_scalar(imm)?.to_u8()?;
+                // Bit 2 of `imm` specifies the offset for indices of `left`.
+                // The offset is 0 when the bit is 0 or 4 when the bit is 1.
+                let left_offset = u64::from((imm >> 2) & 1).checked_mul(4).unwrap();
+                // Bits 0..=1 of `imm` specify the offset for indices of
+                // `right` in blocks of 4 elements.
+                let right_offset = u64::from(imm & 0b11).checked_mul(4).unwrap();
+
+                for i in 0..dest_len {
+                    let left_offset = left_offset.checked_add(i).unwrap();
+                    let mut res: u16 = 0;
+                    for j in 0..4 {
+                        let left = this
+                            .read_scalar(
+                                &this.project_index(&left, left_offset.checked_add(j).unwrap())?,
+                            )?
+                            .to_u8()?;
+                        let right = this
+                            .read_scalar(
+                                &this
+                                    .project_index(&right, right_offset.checked_add(j).unwrap())?,
+                            )?
+                            .to_u8()?;
+                        res = res.checked_add(left.abs_diff(right).into()).unwrap();
+                    }
+                    this.write_scalar(Scalar::from_u16(res), &this.project_index(&dest, i)?)?;
+                }
+            }
+            // Used to implement the _mm_testz_si128, _mm_testc_si128
+            // and _mm_testnzc_si128 functions.
+            // Tests `op & mask == 0`, `op & mask == mask` or
+            // `op & mask != 0 && op & mask != mask`
+            "ptestz" | "ptestc" | "ptestnzc" => {
+                let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let (mask, mask_len) = this.operand_to_simd(mask)?;
+
+                assert_eq!(op_len, mask_len);
+
+                let f = match unprefixed_name {
+                    "ptestz" => |op, mask| op & mask == 0,
+                    "ptestc" => |op, mask| op & mask == mask,
+                    "ptestnzc" => |op, mask| op & mask != 0 && op & mask != mask,
+                    _ => unreachable!(),
+                };
+
+                let mut all_zero = true;
+                for i in 0..op_len {
+                    let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u64()?;
+                    let mask = this.read_scalar(&this.project_index(&mask, i)?)?.to_u64()?;
+                    all_zero &= f(op, mask);
+                }
+
+                this.write_scalar(Scalar::from_i32(all_zero.into()), dest)?;
+            }
+            _ => return Ok(EmulateForeignItemResult::NotSupported),
+        }
+        Ok(EmulateForeignItemResult::NeedsJumping)
+    }
+}
+
+// Rounds the first element of `right` according to `rounding`
+// and copies the remaining elements from `left`.
+fn round_first<'tcx, F: rustc_apfloat::Float>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    left: &OpTy<'tcx, Provenance>,
+    right: &OpTy<'tcx, Provenance>,
+    rounding: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+) -> InterpResult<'tcx, ()> {
+    let (left, left_len) = this.operand_to_simd(left)?;
+    let (right, right_len) = this.operand_to_simd(right)?;
+    let (dest, dest_len) = this.place_to_simd(dest)?;
+
+    assert_eq!(dest_len, left_len);
+    assert_eq!(dest_len, right_len);
+
+    // The fourth bit of `rounding` only affects the SSE status
+    // register, which cannot be accessed from Miri (or from Rust,
+    // for that matter), so we can ignore it.
+    let rounding = match this.read_scalar(rounding)?.to_i32()? & !0b1000 {
+        // When the third bit is 0, the rounding mode is determined by the
+        // first two bits.
+        0b000 => rustc_apfloat::Round::NearestTiesToEven,
+        0b001 => rustc_apfloat::Round::TowardNegative,
+        0b010 => rustc_apfloat::Round::TowardPositive,
+        0b011 => rustc_apfloat::Round::TowardZero,
+        // When the third bit is 1, the rounding mode is determined by the
+        // SSE status register. Since we do not support modifying it from
+        // Miri (or Rust), we assume it to be at its default mode (round-to-nearest).
+        0b100..=0b111 => rustc_apfloat::Round::NearestTiesToEven,
+        rounding => throw_unsup_format!("unsupported rounding mode 0x{rounding:02x}"),
+    };
+
+    let op0: F = this.read_scalar(&this.project_index(&right, 0)?)?.to_float()?;
+    let res = op0.round_to_integral(rounding).value;
+    this.write_scalar(
+        Scalar::from_uint(res.to_bits(), Size::from_bits(F::BITS)),
+        &this.project_index(&dest, 0)?,
+    )?;
+
+    for i in 1..dest_len {
+        this.copy_op(
+            &this.project_index(&left, i)?,
+            &this.project_index(&dest, i)?,
+            /*allow_transmute*/ false,
+        )?;
+    }
+
+    Ok(())
+}
diff --git a/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_box.rs b/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_box.rs
index 0d4506115c7f2..d2823672ade2f 100644
--- a/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_box.rs
+++ b/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_box.rs
@@ -1,7 +1,7 @@
 // Should be caught even without retagging
 //@compile-flags: -Zmiri-disable-stacked-borrows
 #![feature(strict_provenance)]
-use std::ptr::{addr_of_mut, self};
+use std::ptr::{self, addr_of_mut};
 
 // Deref'ing a dangling raw pointer is fine, but for a dangling box it is not.
 // We do this behind a pointer indirection to potentially fool validity checking.
diff --git a/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_ref.rs b/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_ref.rs
index 37da2e96758f7..b62e041d70c6a 100644
--- a/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_ref.rs
+++ b/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_ref.rs
@@ -1,7 +1,7 @@
 // Should be caught even without retagging
 //@compile-flags: -Zmiri-disable-stacked-borrows
 #![feature(strict_provenance)]
-use std::ptr::{addr_of_mut, self};
+use std::ptr::{self, addr_of_mut};
 
 // Deref'ing a dangling raw pointer is fine, but for a dangling reference it is not.
 // We do this behind a pointer indirection to potentially fool validity checking.
diff --git a/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.rs b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.rs
index 9d53faccd1ef1..c1bbc748e1a08 100644
--- a/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.rs
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.rs
@@ -1,4 +1,6 @@
-//@compile-flags: -Zmiri-tree-borrows
+// This does need an aliasing model.
+//@revisions: stack tree
+//@[tree]compile-flags: -Zmiri-tree-borrows
 #![feature(raw_ref_op)]
 #![feature(core_intrinsics)]
 #![feature(custom_mir)]
@@ -25,6 +27,7 @@ pub fn main() {
 fn myfun(ptr: *mut i32) -> i32 {
     // This overwrites the return place, which shouldn't be possible through another pointer.
     unsafe { ptr.write(0) };
-    //~^ ERROR: /write access .* forbidden/
+    //~[stack]^ ERROR: tag does not exist in the borrow stack
+    //~[tree]| ERROR: /write access .* forbidden/
     13
 }
diff --git a/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.stack.stderr b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.stack.stderr
new file mode 100644
index 0000000000000..0666db34fec4e
--- /dev/null
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.stack.stderr
@@ -0,0 +1,40 @@
+error: Undefined Behavior: attempting a write access using <TAG> at ALLOC[0x0], but that tag does not exist in the borrow stack for this location
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL |     unsafe { ptr.write(0) };
+   |              ^^^^^^^^^^^^
+   |              |
+   |              attempting a write access using <TAG> at ALLOC[0x0], but that tag does not exist in the borrow stack for this location
+   |              this error occurs as part of an access at ALLOC[0x0..0x4]
+   |
+   = help: this indicates a potential bug in the program: it performed an invalid operation, but the Stacked Borrows rules it violated are still experimental
+   = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/stacked-borrows.md for further information
+help: <TAG> was created by a SharedReadWrite retag at offsets [0x0..0x4]
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL | /     mir! {
+LL | |         {
+LL | |             let _x = 0;
+LL | |             let ptr = &raw mut _x;
+...  |
+LL | |         }
+LL | |     }
+   | |_____^
+help: <TAG> was later invalidated at offsets [0x0..0x4] by a Unique in-place function argument/return passing protection
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL |     unsafe { ptr.write(0) };
+   |     ^^^^^^^^^^^^^^^^^^^^^^^
+   = note: BACKTRACE (of the first span):
+   = note: inside `myfun` at $DIR/return_pointer_aliasing2.rs:LL:CC
+note: inside `main`
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL |             Call(_x = myfun(ptr), after_call)
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   = note: this error originates in the macro `::core::intrinsics::mir::__internal_remove_let` which comes from the expansion of the macro `mir` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
diff --git a/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.stderr b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.tree.stderr
similarity index 100%
rename from src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.stderr
rename to src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.tree.stderr
diff --git a/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.rs b/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.rs
new file mode 100644
index 0000000000000..79e29b79d6a25
--- /dev/null
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.rs
@@ -0,0 +1,55 @@
+// Doesn't need an aliasing model.
+//@compile-flags: -Zmiri-disable-stacked-borrows
+#![feature(raw_ref_op)]
+#![feature(core_intrinsics)]
+#![feature(custom_mir)]
+
+use std::intrinsics::mir::*;
+use std::panic;
+
+#[repr(C)]
+struct S(i32, [u8; 128]);
+
+#[custom_mir(dialect = "runtime", phase = "optimized")]
+fn docall(out: &mut S) {
+    mir! {
+        {
+            Call(*out = callee(), after_call)
+        }
+
+        after_call = {
+            Return()
+        }
+    }
+}
+
+fn startpanic() -> () {
+    panic!()
+}
+
+#[custom_mir(dialect = "runtime", phase = "optimized")]
+fn callee() -> S {
+    mir! {
+        type RET = S;
+        let _unit: ();
+        {
+            // We test whether changes done to RET before unwinding
+            // become visible to the outside. In codegen we can see them
+            // but Miri should detect this as UB!
+            RET.0 = 42;
+            Call(_unit = startpanic(), after_call)
+        }
+
+        after_call = {
+            Return()
+        }
+    }
+}
+
+fn main() {
+    let mut x = S(0, [0; 128]);
+    panic::catch_unwind(panic::AssertUnwindSafe(|| docall(&mut x))).unwrap_err();
+    // The return place got de-initialized before the call and assigning to RET
+    // does not propagate if we do not reach the `Return`.
+    dbg!(x.0); //~ERROR: uninitialized
+}
diff --git a/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.stderr b/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.stderr
new file mode 100644
index 0000000000000..ecd9a111840a8
--- /dev/null
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.stderr
@@ -0,0 +1,19 @@
+thread 'main' panicked at $DIR/return_pointer_on_unwind.rs:LL:CC:
+explicit panic
+note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
+error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
+  --> $DIR/return_pointer_on_unwind.rs:LL:CC
+   |
+LL |     dbg!(x.0);
+   |     ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `main` at RUSTLIB/std/src/macros.rs:LL:CC
+   = note: this error originates in the macro `dbg` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
diff --git a/src/tools/miri/tests/pass/calls.rs b/src/tools/miri/tests/pass/calls.rs
index 014d1d3acab7f..8db3d3590cc1e 100644
--- a/src/tools/miri/tests/pass/calls.rs
+++ b/src/tools/miri/tests/pass/calls.rs
@@ -34,10 +34,26 @@ fn const_fn_call() -> i64 {
     x
 }
 
+fn call_return_into_passed_reference() {
+    pub fn func<T>(v: &mut T, f: fn(&T) -> T) {
+        // MIR building will introduce a temporary, so this becomes
+        // `let temp = f(v); *v = temp;`.
+        // If this got optimized to `*v = f(v)` on the MIR level we'd have UB
+        // since the return place may not be observed while the function runs!
+        *v = f(v);
+    }
+
+    let mut x = 0;
+    func(&mut x, |v| v + 1);
+    assert_eq!(x, 1);
+}
+
 fn main() {
     assert_eq!(call(), 2);
     assert_eq!(factorial_recursive(), 3628800);
     assert_eq!(call_generic(), (42, true));
     assert_eq!(cross_crate_fn_call(), 1);
     assert_eq!(const_fn_call(), 11);
+
+    call_return_into_passed_reference();
 }
diff --git a/src/tools/miri/tests/pass/float_nan.rs b/src/tools/miri/tests/pass/float_nan.rs
index 698aa447e266a..9b0a40c41b9cb 100644
--- a/src/tools/miri/tests/pass/float_nan.rs
+++ b/src/tools/miri/tests/pass/float_nan.rs
@@ -345,10 +345,7 @@ fn test_casts() {
     );
     // Check that the low bits are gone (not the high bits).
     check_all_outcomes(
-        HashSet::from_iter([
-            F32::nan(Pos, Quiet, 0),
-            F32::nan(Neg, Quiet, 0),
-        ]),
+        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
         || F32::from(F64::nan(Pos, Quiet, 1).as_f64() as f32),
     );
     check_all_outcomes(
@@ -358,7 +355,7 @@ fn test_casts() {
             F32::nan(Pos, Quiet, 1),
             F32::nan(Neg, Quiet, 1),
         ]),
-        || F32::from(F64::nan(Pos, Quiet, 1 << (51-22)).as_f64() as f32),
+        || F32::from(F64::nan(Pos, Quiet, 1 << (51 - 22)).as_f64() as f32),
     );
     check_all_outcomes(
         HashSet::from_iter([
diff --git a/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs b/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs
new file mode 100644
index 0000000000000..090b1db0af0c2
--- /dev/null
+++ b/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs
@@ -0,0 +1,291 @@
+// Ignore everything except x86 and x86_64
+// Any additional target are added to CI should be ignored here
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+//@compile-flags: -C target-feature=+aes,+vaes,+avx512f
+
+#![feature(avx512_target_feature, stdsimd)]
+
+use core::mem::transmute;
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+fn main() {
+    assert!(is_x86_feature_detected!("aes"));
+    assert!(is_x86_feature_detected!("vaes"));
+    assert!(is_x86_feature_detected!("avx512f"));
+
+    unsafe {
+        test_aes();
+        test_vaes();
+    }
+}
+
+// The constants in the tests below are just bit patterns. They should not
+// be interpreted as integers; signedness does not make sense for them, but
+// __m128i happens to be defined in terms of signed integers.
+#[allow(overflowing_literals)]
+#[target_feature(enable = "aes")]
+unsafe fn test_aes() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/aes.rs
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesdec_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
+        let r = _mm_aesdec_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesdec_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesdeclast_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
+        let r = _mm_aesdeclast_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesdeclast_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesenc_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
+        let r = _mm_aesenc_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesenc_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesenclast_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
+        let r = _mm_aesenclast_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesenclast_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesimc_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055);
+        let r = _mm_aesimc_si128(a);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesimc_si128();
+}
+
+// The constants in the tests below are just bit patterns. They should not
+// be interpreted as integers; signedness does not make sense for them, but
+// __m128i happens to be defined in terms of signed integers.
+#[allow(overflowing_literals)]
+#[target_feature(enable = "vaes,avx512f")]
+unsafe fn test_vaes() {
+    #[target_feature(enable = "avx")]
+    unsafe fn get_a256() -> __m256i {
+        // Constants are random
+        _mm256_set_epi64x(
+            0xb89f43a558d3cd51,
+            0x57b3e81e369bd603,
+            0xf177a1a626933fd6,
+            0x50d8adbed1a2f9d7,
+        )
+    }
+    #[target_feature(enable = "avx")]
+    unsafe fn get_k256() -> __m256i {
+        // Constants are random
+        _mm256_set_epi64x(
+            0x503ff704588b5627,
+            0xe23d882ed9c3c146,
+            0x2785e5b670155b3c,
+            0xa750718e183549ff,
+        )
+    }
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesdec_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesdec_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesdec_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesdeclast_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesdeclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesdeclast_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesenc_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesenc_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesenc_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesenclast_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesenclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesenclast_epi128();
+
+    #[target_feature(enable = "avx512f")]
+    unsafe fn get_a512() -> __m512i {
+        // Constants are random
+        _mm512_set_epi64(
+            0xb89f43a558d3cd51,
+            0x57b3e81e369bd603,
+            0xf177a1a626933fd6,
+            0x50d8adbed1a2f9d7,
+            0xfbfee3116629db78,
+            0x6aef4a91f2ad50f4,
+            0x4258bb51ff1d476d,
+            0x31da65761c8016cf,
+        )
+    }
+    #[target_feature(enable = "avx512f")]
+    unsafe fn get_k512() -> __m512i {
+        // Constants are random
+        _mm512_set_epi64(
+            0x503ff704588b5627,
+            0xe23d882ed9c3c146,
+            0x2785e5b670155b3c,
+            0xa750718e183549ff,
+            0xdfb408830a65d3d9,
+            0x0de3d92adac81b0a,
+            0xed2741fe12877cae,
+            0x3251ddb5404e0974,
+        )
+    }
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdec_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesdec_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesdec_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdeclast_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesdeclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesdeclast_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenc_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesenc_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesenc_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenclast_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesenclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesenclast_epi128();
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+}
diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse41.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse41.rs
new file mode 100644
index 0000000000000..d5489ffaf4ba1
--- /dev/null
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse41.rs
@@ -0,0 +1,315 @@
+// Ignore everything except x86 and x86_64
+// Any additional target are added to CI should be ignored here
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+//@compile-flags: -C target-feature=+sse4.1
+
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+use std::mem::transmute;
+
+fn main() {
+    assert!(is_x86_feature_detected!("sse4.1"));
+
+    unsafe {
+        test_sse41();
+    }
+}
+
+#[target_feature(enable = "sse4.1")]
+unsafe fn test_sse41() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse41.rs
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_insert_ps() {
+        let a = _mm_set1_ps(1.0);
+        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let r = _mm_insert_ps::<0b11_00_1100>(a, b);
+        let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
+        assert_eq_m128(r, e);
+
+        // Zeroing takes precedence over copied value
+        let a = _mm_set1_ps(1.0);
+        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let r = _mm_insert_ps::<0b11_00_0001>(a, b);
+        let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
+        assert_eq_m128(r, e);
+    }
+    test_mm_insert_ps();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_packus_epi32() {
+        let a = _mm_setr_epi32(1, 2, 3, 4);
+        let b = _mm_setr_epi32(-1, -2, -3, -4);
+        let r = _mm_packus_epi32(a, b);
+        let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_packus_epi32();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_dp_pd() {
+        let a = _mm_setr_pd(2.0, 3.0);
+        let b = _mm_setr_pd(1.0, 4.0);
+        let e = _mm_setr_pd(14.0, 0.0);
+        assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
+    }
+    test_mm_dp_pd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_dp_ps() {
+        let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
+        let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
+        let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
+        assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
+    }
+    test_mm_dp_ps();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_floor_sd() {
+        let a = _mm_setr_pd(2.5, 4.5);
+        let b = _mm_setr_pd(-1.5, -3.5);
+        let r = _mm_floor_sd(a, b);
+        let e = _mm_setr_pd(-2.0, 4.5);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_floor_sd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_floor_ss() {
+        let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
+        let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
+        let r = _mm_floor_ss(a, b);
+        let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
+        assert_eq_m128(r, e);
+    }
+    test_mm_floor_ss();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_ceil_sd() {
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_ceil_sd(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_ceil_sd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_ceil_ss() {
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
+        let r = _mm_ceil_ss(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+    }
+    test_mm_ceil_ss();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_round_sd() {
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
+        let e = _mm_setr_pd(-3.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        // Assume round-to-nearest by default
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_round_sd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_round_ss() {
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
+        let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
+        let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        // Assume round-to-nearest by default
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+    }
+    test_mm_round_ss();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_minpos_epu16() {
+        let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
+        let r = _mm_minpos_epu16(a);
+        let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+
+        let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
+        let r = _mm_minpos_epu16(a);
+        let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+
+        // Case where the minimum value is repeated
+        let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
+        let r = _mm_minpos_epu16(a);
+        let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_minpos_epu16();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_mpsadbw_epu8() {
+        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+        let r = _mm_mpsadbw_epu8::<0b000>(a, a);
+        let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b001>(a, a);
+        let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b100>(a, a);
+        let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b101>(a, a);
+        let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b111>(a, a);
+        let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_mpsadbw_epu8();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_testz_si128() {
+        let a = _mm_set1_epi8(1);
+        let mask = _mm_set1_epi8(0);
+        let r = _mm_testz_si128(a, mask);
+        assert_eq!(r, 1);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b110);
+        let r = _mm_testz_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(0b011);
+        let mask = _mm_set1_epi8(0b100);
+        let r = _mm_testz_si128(a, mask);
+        assert_eq!(r, 1);
+    }
+    test_mm_testz_si128();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_testc_si128() {
+        let a = _mm_set1_epi8(-1);
+        let mask = _mm_set1_epi8(0);
+        let r = _mm_testc_si128(a, mask);
+        assert_eq!(r, 1);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b110);
+        let r = _mm_testc_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b100);
+        let r = _mm_testc_si128(a, mask);
+        assert_eq!(r, 1);
+    }
+    test_mm_testc_si128();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_testnzc_si128() {
+        let a = _mm_set1_epi8(0);
+        let mask = _mm_set1_epi8(1);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(-1);
+        let mask = _mm_set1_epi8(0);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b110);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 1);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b101);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 0);
+    }
+    test_mm_testnzc_si128();
+}
+
+#[track_caller]
+#[target_feature(enable = "sse")]
+unsafe fn assert_eq_m128(a: __m128, b: __m128) {
+    let r = _mm_cmpeq_ps(a, b);
+    if _mm_movemask_ps(r) != 0b1111 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
+    if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+}
diff --git a/src/tools/miri/tests/pass/panic/catch_panic.rs b/src/tools/miri/tests/pass/panic/catch_panic.rs
index e4a7f8e3481eb..f5b4eaf685da8 100644
--- a/src/tools/miri/tests/pass/panic/catch_panic.rs
+++ b/src/tools/miri/tests/pass/panic/catch_panic.rs
@@ -5,6 +5,7 @@
 
 use std::cell::Cell;
 use std::panic::{catch_unwind, AssertUnwindSafe};
+use std::process;
 
 thread_local! {
     static MY_COUNTER: Cell<usize> = Cell::new(0);
@@ -62,26 +63,26 @@ fn main() {
     // Built-in panics; also make sure the message is right.
     test(Some("index out of bounds: the len is 3 but the index is 4"), |_old_val| {
         let _val = [0, 1, 2][4];
-        loop {}
+        process::abort()
     });
     test(Some("attempt to divide by zero"), |_old_val| {
         let _val = 1 / 0;
-        loop {}
+        process::abort()
     });
 
     test(Some("align_offset: align is not a power-of-two"), |_old_val| {
         let _ = std::ptr::null::<u8>().align_offset(3);
-        loop {}
+        process::abort()
     });
 
     // Assertion and debug assertion
     test(None, |_old_val| {
         assert!(false);
-        loop {}
+        process::abort()
     });
     test(None, |_old_val| {
         debug_assert!(false);
-        loop {}
+        process::abort()
     });
 
     eprintln!("Success!"); // Make sure we get this in stderr
diff --git a/src/tools/miri/tests/pass/ptr_raw.rs b/src/tools/miri/tests/pass/ptr_raw.rs
index 9743278961b71..11c3455a9ca51 100644
--- a/src/tools/miri/tests/pass/ptr_raw.rs
+++ b/src/tools/miri/tests/pass/ptr_raw.rs
@@ -1,6 +1,6 @@
 #![feature(strict_provenance)]
-use std::ptr::{self, addr_of};
 use std::mem;
+use std::ptr::{self, addr_of};
 
 fn basic_raw() {
     let mut x = 12;
diff --git a/src/tools/miri/triagebot.toml b/src/tools/miri/triagebot.toml
index 1c520a9c77926..3b767b3e62f13 100644
--- a/src/tools/miri/triagebot.toml
+++ b/src/tools/miri/triagebot.toml
@@ -10,6 +10,5 @@ allow-unauthenticated = [
 # Gives us the commands 'ready', 'author', 'blocked'
 [shortcut]
 
-# disabled until https://github.com/rust-lang/triagebot/pull/1720 lands
-#[no-merges]
-#exclude_titles = ["Rollup of", "sync from rustc"]
+[no-merges]
+exclude_titles = ["Rustup"]