From 31725724da037ba67b3d7e7109131972274a5454 Mon Sep 17 00:00:00 2001
From: Scott McMurray <scottmcm@users.noreply.github.com>
Date: Sat, 2 May 2026 23:44:03 -0700
Subject: [PATCH 1/2] Add some basic tests for `{size,align}_of_val`

---
 .../intrinsics/size_and_align_of_val.rs       | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 tests/codegen-llvm/intrinsics/size_and_align_of_val.rs

diff --git a/tests/codegen-llvm/intrinsics/size_and_align_of_val.rs b/tests/codegen-llvm/intrinsics/size_and_align_of_val.rs
new file mode 100644
index 0000000000000..813a4ae1479ce
--- /dev/null
+++ b/tests/codegen-llvm/intrinsics/size_and_align_of_val.rs
@@ -0,0 +1,53 @@
+//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes -Z mir-opt-level=0
+//@ only-64bit (so I don't need to worry about usize)
+
+#![crate_type = "lib"]
+#![feature(core_intrinsics)]
+
+// Here to have unit tests of what they actually emit and to track things like
+// <https://github.com/rust-lang/rust/issues/152773>
+
+use std::intrinsics::{align_of_val, size_of_val};
+
+// CHECK-LABEL: @align_of_array(
+#[no_mangle]
+pub unsafe fn align_of_array(x: &[u16; 7]) -> usize {
+    // CHECK: start:
+    // CHECK: %0 = alloca [8 x i8]
+    // CHECK: store i64 2, ptr %0
+    // CHECK: [[R:%.+]] = load i64, ptr %0
+    // CHECK: ret i64 [[R]]
+    align_of_val(x)
+}
+
+// CHECK-LABEL: @size_of_array(
+#[no_mangle]
+pub unsafe fn size_of_array(x: &[u16; 7]) -> usize {
+    // CHECK: %0 = alloca [8 x i8]
+    // CHECK: store i64 14, ptr %0
+    // CHECK: [[R:%.+]] = load i64, ptr %0
+    // CHECK: ret i64 [[R]]
+    size_of_val(x)
+}
+
+// CHECK-LABEL: @align_of_slice(
+#[no_mangle]
+pub unsafe fn align_of_slice(x: &[u16]) -> usize {
+    // CHECK: %0 = alloca [8 x i8]
+    // CHECK: [[SIZE:%.+]] = mul nuw nsw i64 %x.1, 2
+    // CHECK: store i64 2, ptr %0
+    // CHECK: [[R:%.+]] = load i64, ptr %0
+    // CHECK: ret i64 [[R]]
+    align_of_val(x)
+}
+
+// CHECK-LABEL: @size_of_slice(
+#[no_mangle]
+pub unsafe fn size_of_slice(x: &[u16]) -> usize {
+    // CHECK: %0 = alloca [8 x i8]
+    // CHECK: [[SIZE:%.+]] = mul nuw nsw i64 %x.1, 2
+    // CHECK: store i64 [[SIZE]], ptr %0
+    // CHECK: [[R:%.+]] = load i64, ptr %0
+    // CHECK: ret i64 [[R]]
+    size_of_val(x)
+}

From 298d38f6bed70a9d869380f33165f3fdb35edbe5 Mon Sep 17 00:00:00 2001
From: Scott McMurray <scottmcm@users.noreply.github.com>
Date: Sun, 3 May 2026 12:01:22 -0700
Subject: [PATCH 2/2] Let intrinsics use the SSA operand path

---
 compiler/rustc_codegen_gcc/src/context.rs     |   6 +-
 .../rustc_codegen_gcc/src/intrinsic/mod.rs    |  34 +--
 .../rustc_codegen_gcc/src/intrinsic/simd.rs   |  14 +-
 compiler/rustc_codegen_llvm/src/context.rs    |  13 +-
 compiler/rustc_codegen_llvm/src/intrinsic.rs  | 135 ++++++----
 compiler/rustc_codegen_ssa/src/mir/analyze.rs |  20 +-
 compiler/rustc_codegen_ssa/src/mir/block.rs   |  98 +++++---
 .../rustc_codegen_ssa/src/mir/intrinsic.rs    | 237 +++++++++---------
 compiler/rustc_codegen_ssa/src/mir/mod.rs     |  24 ++
 compiler/rustc_codegen_ssa/src/mir/operand.rs |   1 +
 .../rustc_codegen_ssa/src/traits/intrinsic.rs |  17 +-
 compiler/rustc_codegen_ssa/src/traits/misc.rs |   7 +
 tests/codegen-llvm/array-equality.rs          |   4 +-
 tests/codegen-llvm/atomicptr.rs               |   9 +-
 .../codegen-llvm/dst-vtable-align-nonzero.rs  |   4 +-
 tests/codegen-llvm/dst-vtable-size-range.rs   |   4 +-
 .../codegen-llvm/intrinsics/disjoint_bitor.rs |   8 +-
 .../intrinsics/size_and_align_of_val.rs       |  27 +-
 .../simd-intrinsic-generic-bitmask.rs         |  12 +-
 .../simd-intrinsic-mask-reduce.rs             |  24 +-
 tests/codegen-llvm/simd/aggregate-simd.rs     |   4 +-
 21 files changed, 417 insertions(+), 285 deletions(-)

diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs
index e0810a35b040b..ed313859aeafa 100644
--- a/compiler/rustc_codegen_gcc/src/context.rs
+++ b/compiler/rustc_codegen_gcc/src/context.rs
@@ -19,7 +19,7 @@ use rustc_middle::ty::{self, ExistentialTraitRef, Instance, Ty, TyCtxt};
 use rustc_session::Session;
 #[cfg(feature = "master")]
 use rustc_session::config::DebugInfo;
-use rustc_span::{DUMMY_SP, Span, respan};
+use rustc_span::{DUMMY_SP, Span, Symbol, respan};
 use rustc_target::spec::{HasTargetSpec, HasX86AbiOpt, Target, TlsModel, X86Abi};
 
 #[cfg(feature = "master")]
@@ -497,6 +497,10 @@ impl<'gcc, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
             None
         }
     }
+
+    fn intrinsic_call_expects_place_always(&self, _name: Symbol) -> bool {
+        true
+    }
 }
 
 impl<'gcc, 'tcx> HasTyCtxt<'tcx> for CodegenCx<'gcc, 'tcx> {
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index d823e209fd7d9..827ee1644e7ae 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -10,6 +10,7 @@ use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::common::IntPredicate;
 use rustc_codegen_ssa::errors::InvalidMonomorphization;
+use rustc_codegen_ssa::mir::IntrinsicResult;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
 #[cfg(feature = "master")]
@@ -194,11 +195,14 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
         &mut self,
         instance: Instance<'tcx>,
         args: &[OperandRef<'tcx, RValue<'gcc>>],
-        result: PlaceRef<'tcx, RValue<'gcc>>,
+        result_layout: ty::layout::TyAndLayout<'tcx>,
+        result_place: Option<PlaceValue<RValue<'gcc>>>,
         span: Span,
-    ) -> Result<(), Instance<'tcx>> {
+    ) -> IntrinsicResult<'tcx, RValue<'gcc>> {
         let tcx = self.tcx;
 
+        let result = PlaceRef { val: result_place.unwrap(), layout: result_layout };
+
         let name = tcx.item_name(instance.def_id());
         let name_str = name.as_str();
         let fn_args = instance.args;
@@ -353,7 +357,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
                     args[2].immediate(),
                     result,
                 );
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
             sym::breakpoint => {
                 unimplemented!();
@@ -375,12 +379,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
             sym::volatile_store => {
                 let dst = args[0].deref(self.cx());
                 args[1].val.volatile_store(self, dst);
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
             sym::unaligned_volatile_store => {
                 let dst = args[0].deref(self.cx());
                 args[1].val.unaligned_volatile_store(self, dst);
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
             sym::prefetch_read_data
             | sym::prefetch_write_data
@@ -448,12 +452,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
                         _ => bug!(),
                     },
                     None => {
-                        tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
+                        let err = tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
                             span,
                             name,
                             ty: args[0].layout.ty,
                         });
-                        return Ok(());
+                        return IntrinsicResult::Err(err);
                     }
                 }
             }
@@ -544,7 +548,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
                 extended_asm.set_volatile_flag(true);
 
                 // We have copied the value to `result` already.
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
 
             sym::ptr_mask => {
@@ -569,12 +573,15 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
                     span,
                 ) {
                     Ok(value) => value,
-                    Err(()) => return Ok(()),
+                    Err(err) => return IntrinsicResult::Err(err),
                 }
             }
 
             // Fall back to default body
-            _ => return Err(Instance::new_raw(instance.def_id(), instance.args)),
+            _ => {
+                let fallback = Instance::new_raw(instance.def_id(), instance.args);
+                return IntrinsicResult::Fallback(fallback);
+            }
         };
 
         if result.layout.ty.is_bool() {
@@ -583,7 +590,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
         } else if !result.layout.ty.is_unit() {
             self.store_to_place(value, result.val);
         }
-        Ok(())
+        IntrinsicResult::WroteIntoPlace
     }
 
     fn codegen_llvm_intrinsic_call(
@@ -694,13 +701,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
         self.context.new_rvalue_from_int(self.int_type, 0)
     }
 
-    fn va_start(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> {
+    fn va_start(&mut self, _va_list: RValue<'gcc>) {
         unimplemented!();
     }
 
-    fn va_end(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> {
+    fn va_end(&mut self, _va_list: RValue<'gcc>) {
         // FIXME(antoyo): implement.
-        self.context.new_rvalue_from_int(self.int_type, 0)
     }
 }
 
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index a32592b45e5ea..97ec6106e278a 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -17,7 +17,7 @@ use rustc_hir as hir;
 use rustc_middle::mir::BinOp;
 use rustc_middle::ty::layout::HasTyCtxt;
 use rustc_middle::ty::{self, Ty, Unnormalized};
-use rustc_span::{Span, Symbol, sym};
+use rustc_span::{ErrorGuaranteed, Span, Symbol, sym};
 
 use crate::builder::Builder;
 #[cfg(not(feature = "master"))]
@@ -32,12 +32,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     ret_ty: Ty<'tcx>,
     llret_ty: Type<'gcc>,
     span: Span,
-) -> Result<RValue<'gcc>, ()> {
+) -> Result<RValue<'gcc>, ErrorGuaranteed> {
     // macros for error handling:
     macro_rules! return_error {
         ($err:expr) => {{
-            bx.tcx.dcx().emit_err($err);
-            return Err(());
+            let err = bx.tcx.dcx().emit_err($err);
+            return Err(err);
         }};
     }
     macro_rules! require {
@@ -809,11 +809,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         bx: &mut Builder<'_, 'gcc, 'tcx>,
         span: Span,
         args: &[OperandRef<'tcx, RValue<'gcc>>],
-    ) -> Result<RValue<'gcc>, ()> {
+    ) -> Result<RValue<'gcc>, ErrorGuaranteed> {
         macro_rules! return_error {
             ($err:expr) => {{
-                bx.tcx.dcx().emit_err($err);
-                return Err(());
+                let err = bx.tcx.dcx().emit_err($err);
+                return Err(err);
             }};
         }
         let ty::Float(ref f) = *in_elem.kind() else {
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 5b730b820b84a..f0d9e48e41968 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -25,7 +25,7 @@ use rustc_session::Session;
 use rustc_session::config::{
     BranchProtection, CFGuard, CFProtection, CrateType, DebugInfo, FunctionReturn, PAuthKey, PacRet,
 };
-use rustc_span::{DUMMY_SP, Span, Spanned, Symbol};
+use rustc_span::{DUMMY_SP, Span, Spanned, Symbol, sym};
 use rustc_symbol_mangling::mangle_internal_symbol;
 use rustc_target::spec::{
     Arch, CfgAbi, Env, HasTargetSpec, Os, RelocModel, SmallDataThresholdSupport, Target, TlsModel,
@@ -937,6 +937,17 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
             None
         }
     }
+
+    fn intrinsic_call_expects_place_always(&self, name: Symbol) -> bool {
+        matches!(
+            name,
+            sym::autodiff
+                | sym::catch_unwind
+                | sym::volatile_load
+                | sym::unaligned_volatile_load
+                | sym::black_box
+        )
+    }
 }
 
 impl<'ll> CodegenCx<'ll, '_> {
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 84c1e8e6f3d47..820cb7f5eb3e8 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -9,6 +9,7 @@ use rustc_abi::{
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
 use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization};
+use rustc_codegen_ssa::mir::IntrinsicResult;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
 use rustc_codegen_ssa::traits::*;
@@ -25,7 +26,7 @@ use rustc_middle::{bug, span_bug};
 use rustc_session::config::CrateType;
 use rustc_session::errors::feature_err;
 use rustc_session::lint::builtin::DEPRECATED_LLVM_INTRINSIC;
-use rustc_span::{Span, Symbol, sym};
+use rustc_span::{ErrorGuaranteed, Span, Symbol, sym};
 use rustc_symbol_mangling::{mangle_internal_symbol, symbol_name_for_instance_in_crate};
 use rustc_target::callconv::PassMode;
 use rustc_target::spec::{Arch, Os};
@@ -175,9 +176,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
         &mut self,
         instance: ty::Instance<'tcx>,
         args: &[OperandRef<'tcx, &'ll Value>],
-        result: PlaceRef<'tcx, &'ll Value>,
+        result_layout: ty::layout::TyAndLayout<'tcx>,
+        result_place: Option<PlaceValue<&'ll Value>>,
         span: Span,
-    ) -> Result<(), ty::Instance<'tcx>> {
+    ) -> IntrinsicResult<'tcx, &'ll Value> {
         let tcx = self.tcx;
         let llvm_version = crate::llvm_util::get_version();
 
@@ -222,8 +224,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 )
             }
             sym::autodiff => {
+                let result = PlaceRef {
+                    val: result_place.unwrap(),
+                    layout: result_layout,
+                };
                 codegen_autodiff(self, tcx, instance, args, result);
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
             sym::offload => {
                 if tcx.sess.opts.unstable_opts.offload.is_empty() {
@@ -235,7 +241,8 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 }
 
                 codegen_offload(self, tcx, instance, args);
-                return Ok(());
+                // offload *has* a return type, but somehow works without mentioning the place
+                return IntrinsicResult::WroteIntoPlace;
             }
             sym::is_val_statically_known => {
                 if let OperandValue::Immediate(imm) = args[0].val {
@@ -264,8 +271,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         let ptr = select(self, true_val.llval, false_val.llval);
                         let selected =
                             OperandValue::Ref(PlaceValue::new_sized(ptr, true_val.align));
+                        let result = PlaceRef {
+                            val: result_place.unwrap(),
+                            layout: result_layout,
+                        };
                         selected.store(self, result);
-                        return Ok(());
+                        return IntrinsicResult::WroteIntoPlace;
                     }
                     (OperandValue::Immediate(_), OperandValue::Immediate(_))
                     | (OperandValue::Pair(_, _), OperandValue::Pair(_, _)) => {
@@ -273,11 +284,15 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         let false_val = args[2].immediate_or_packed_pair(self);
                         select(self, true_val, false_val)
                     }
-                    (OperandValue::ZeroSized, OperandValue::ZeroSized) => return Ok(()),
+                    (OperandValue::ZeroSized, OperandValue::ZeroSized) => return IntrinsicResult::Operand(OperandValue::ZeroSized),
                     _ => span_bug!(span, "Incompatible OperandValue for select_unpredictable"),
                 }
             }
             sym::catch_unwind => {
+                let result = PlaceRef {
+                    val: result_place.unwrap(),
+                    layout: result_layout,
+                };
                 catch_unwind_intrinsic(
                     self,
                     args[0].immediate(),
@@ -285,7 +300,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     args[2].immediate(),
                     result,
                 );
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
             sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[], &[]),
             sym::va_arg => {
@@ -299,7 +314,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     feature_err(&*self.sess(), feature, span, msg).emit();
                 }
 
-                let BackendRepr::Scalar(scalar) = result.layout.backend_repr else {
+                let BackendRepr::Scalar(scalar) = result_layout.backend_repr else {
                     bug!("the va_arg intrinsic does not support non-scalar types")
                 };
 
@@ -316,7 +331,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         bug!("the va_arg intrinsic does not support `i128`/`u128`")
                     }
                     Primitive::Int(..) => {
-                        let int_width = self.cx().size_of(result.layout.ty).bits();
+                        let int_width = self.cx().size_of(result_layout.ty).bits();
                         let target_c_int_width = self.cx().sess().target.options.c_int_width;
                         if int_width < u64::from(target_c_int_width) {
                             // Smaller integer types are automatically promototed and `va_arg`
@@ -346,34 +361,39 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     }
                 }
 
-                emit_va_arg(self, args[0], result.layout.ty)
+                emit_va_arg(self, args[0], result_layout.ty)
             }
 
             sym::volatile_load | sym::unaligned_volatile_load => {
+                let result = PlaceRef {
+                    val: result_place.unwrap(),
+                    layout: result_layout,
+                };
+
                 let ptr = args[0].immediate();
-                let load = self.volatile_load(result.layout.llvm_type(self), ptr);
+                let load = self.volatile_load(result_layout.llvm_type(self), ptr);
                 let align = if name == sym::unaligned_volatile_load {
                     1
                 } else {
-                    result.layout.align.bytes() as u32
+                    result_layout.align.bytes() as u32
                 };
                 unsafe {
                     llvm::LLVMSetAlignment(load, align);
                 }
-                if !result.layout.is_zst() {
+                if !result_layout.is_zst() {
                     self.store_to_place(load, result.val);
                 }
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
             sym::volatile_store => {
                 let dst = args[0].deref(self.cx());
                 args[1].val.volatile_store(self, dst);
-                return Ok(());
+                return IntrinsicResult::Operand(OperandValue::ZeroSized);
             }
             sym::unaligned_volatile_store => {
                 let dst = args[0].deref(self.cx());
                 args[1].val.unaligned_volatile_store(self, dst);
-                return Ok(());
+                return IntrinsicResult::Operand(OperandValue::ZeroSized);
             }
             sym::prefetch_read_data
             | sym::prefetch_write_data
@@ -397,7 +417,8 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         self.const_i32(locality),
                         self.const_i32(cache_type),
                     ],
-                )
+                );
+                return IntrinsicResult::Operand(OperandValue::ZeroSized);
             }
             sym::carrying_mul_add => {
                 let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
@@ -435,12 +456,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             sym::carryless_mul if llvm_version >= (22, 0, 0) => {
                 let ty = args[0].layout.ty;
                 if !ty.is_integral() {
-                    tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
+                    let err = tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
                         span,
                         name,
                         ty,
                     });
-                    return Ok(());
+                    return IntrinsicResult::Err(err);
                 }
                 let (size, _) = ty.int_size_and_signed(self.tcx);
                 let width = size.bits();
@@ -464,12 +485,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             | sym::unchecked_funnel_shr => {
                 let ty = args[0].layout.ty;
                 if !ty.is_integral() {
-                    tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
+                    let err = tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
                         span,
                         name,
                         ty,
                     });
-                    return Ok(());
+                    return IntrinsicResult::Err(err);
                 }
                 let (size, signed) = ty.int_size_and_signed(self.tcx);
                 let width = size.bits();
@@ -485,12 +506,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         };
                         let ret =
                             self.call_intrinsic(llvm_name, &[llty], &[args[0].immediate(), y]);
-                        self.intcast(ret, result.layout.llvm_type(self), false)
+                        self.intcast(ret, result_layout.llvm_type(self), false)
                     }
                     sym::ctpop => {
                         let ret =
                             self.call_intrinsic("llvm.ctpop", &[llty], &[args[0].immediate()]);
-                        self.intcast(ret, result.layout.llvm_type(self), false)
+                        self.intcast(ret, result_layout.llvm_type(self), false)
                     }
                     sym::bswap => {
                         if width == 8 {
@@ -552,12 +573,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     Scalar(_) | ScalarPair(_, _) => true,
                     SimdVector { .. } => false,
                     SimdScalableVector { .. } => {
-                        tcx.dcx().emit_err(InvalidMonomorphization::NonScalableType {
+                        let err = tcx.dcx().emit_err(InvalidMonomorphization::NonScalableType {
                             span,
                             name: sym::raw_eq,
                             ty: tp_ty,
                         });
-                        return Ok(());
+                        return IntrinsicResult::Err(err);
                     }
                     Memory { .. } => {
                         // For rusty ABIs, small aggregates are actually passed
@@ -595,6 +616,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             }
 
             sym::black_box => {
+                let result = PlaceRef {
+                    val: result_place.unwrap(),
+                    layout: result_layout,
+                };
                 args[0].val.store(self, result);
                 let result_val_span = [result.val.llval];
                 // We need to "use" the argument in some way LLVM can't introspect, and on
@@ -629,7 +654,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 .unwrap_or_else(|| bug!("failed to generate inline asm call for `black_box`"));
 
                 // We have copied the value to `result` already.
-                return Ok(());
+                return IntrinsicResult::WroteIntoPlace;
             }
 
             sym::gpu_launch_sized_workgroup_mem => {
@@ -653,7 +678,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     self.type_array(self.type_i8(), 0),
                     AddressSpace::GPU_WORKGROUP,
                 );
-                let ty::RawPtr(inner_ty, _) = result.layout.ty.kind() else { unreachable!() };
+                let ty::RawPtr(inner_ty, _) = result_layout.ty.kind() else { unreachable!() };
                 // The alignment of the global is used to specify the *minimum* alignment that
                 // must be obeyed by the GPU runtime.
                 // When multiple of these global variables are used by a kernel, the maximum alignment is taken.
@@ -817,10 +842,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     );
                 }
 
-                let llret_ty = if result.layout.ty.is_simd()
-                    && let BackendRepr::Memory { .. } = result.layout.backend_repr
+                let llret_ty = if result_layout.ty.is_simd()
+                    && let BackendRepr::Memory { .. } = result_layout.backend_repr
                 {
-                    let (size, elem_ty) = result.layout.ty.simd_size_and_type(self.tcx());
+                    let (size, elem_ty) = result_layout.ty.simd_size_and_type(self.tcx());
                     let elem_ll_ty = match elem_ty.kind() {
                         ty::Float(f) => self.type_float_from_ty(*f),
                         ty::Int(i) => self.type_int_from_ty(*i),
@@ -830,7 +855,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     };
                     self.type_vector(elem_ll_ty, size)
                 } else {
-                    result.layout.llvm_type(self)
+                    result_layout.llvm_type(self)
                 };
 
                 match generic_simd_intrinsic(
@@ -838,31 +863,37 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     name,
                     fn_args,
                     &loaded_args,
-                    result.layout.ty,
+                    result_layout.ty,
                     llret_ty,
                     span,
                 ) {
                     Ok(llval) => llval,
                     // If there was an error, just skip this invocation... we'll abort compilation
                     // anyway, but we can keep codegen'ing to find more errors.
-                    Err(()) => return Ok(()),
+                    Err(err) => return IntrinsicResult::Err(err),
                 }
             }
 
             _ => {
                 debug!("unknown intrinsic '{}' -- falling back to default body", name);
                 // Call the fallback body instead of generating the intrinsic code
-                return Err(ty::Instance::new_raw(instance.def_id(), instance.args));
+                let fallback = ty::Instance::new_raw(instance.def_id(), instance.args);
+                return IntrinsicResult::Fallback(fallback);
             }
         };
 
-        if result.layout.ty.is_bool() {
-            let val = self.from_immediate(llval);
-            self.store_to_place(val, result.val);
-        } else if !result.layout.ty.is_unit() {
-            self.store_to_place(llval, result.val);
+        if let BackendRepr::Memory { .. } = result_layout.backend_repr {
+            // We have an llvm immediate, but that's not what cg_ssa expects,
+            // so write it into the place (that always exists for memory)
+            if !result_layout.is_zst() {
+                self.store_to_place(llval, result_place.unwrap());
+            }
+            IntrinsicResult::WroteIntoPlace
+        } else {
+            IntrinsicResult::Operand(
+                OperandRef::from_immediate_or_packed_pair(self, llval, result_layout).val,
+            )
         }
-        Ok(())
     }
 
     fn codegen_llvm_intrinsic_call(
@@ -1019,12 +1050,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
         self.extract_value(type_checked_load, 0)
     }
 
-    fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value {
-        self.call_intrinsic("llvm.va_start", &[self.val_ty(va_list)], &[va_list])
+    fn va_start(&mut self, va_list: &'ll Value) {
+        self.call_intrinsic("llvm.va_start", &[self.val_ty(va_list)], &[va_list]);
     }
 
-    fn va_end(&mut self, va_list: &'ll Value) -> &'ll Value {
-        self.call_intrinsic("llvm.va_end", &[self.val_ty(va_list)], &[va_list])
+    fn va_end(&mut self, va_list: &'ll Value) {
+        self.call_intrinsic("llvm.va_end", &[self.val_ty(va_list)], &[va_list]);
     }
 }
 
@@ -1951,11 +1982,11 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     ret_ty: Ty<'tcx>,
     llret_ty: &'ll Type,
     span: Span,
-) -> Result<&'ll Value, ()> {
+) -> Result<&'ll Value, ErrorGuaranteed> {
     macro_rules! return_error {
         ($diag: expr) => {{
-            bx.sess().dcx().emit_err($diag);
-            return Err(());
+            let err = bx.sess().dcx().emit_err($diag);
+            return Err(err);
         }};
     }
 
@@ -2402,11 +2433,11 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         bx: &mut Builder<'_, 'll, 'tcx>,
         span: Span,
         args: &[OperandRef<'tcx, &'ll Value>],
-    ) -> Result<&'ll Value, ()> {
+    ) -> Result<&'ll Value, ErrorGuaranteed> {
         macro_rules! return_error {
             ($diag: expr) => {{
-                bx.sess().dcx().emit_err($diag);
-                return Err(());
+                let err = bx.sess().dcx().emit_err($diag);
+                return Err(err);
             }};
         }
 
@@ -2992,7 +3023,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.$red(input);
-                        Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
+                        Ok(r)
                     }
                     _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                         span,
diff --git a/compiler/rustc_codegen_ssa/src/mir/analyze.rs b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
index de755d5617801..fb5734ba087c6 100644
--- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
@@ -7,8 +7,8 @@ use rustc_index::bit_set::DenseBitSet;
 use rustc_index::{IndexSlice, IndexVec};
 use rustc_middle::mir::visit::{MutatingUseContext, NonMutatingUseContext, PlaceContext, Visitor};
 use rustc_middle::mir::{self, DefLocation, Location, TerminatorKind, traversal};
-use rustc_middle::ty::layout::LayoutOf;
-use rustc_middle::{bug, span_bug};
+use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
+use rustc_middle::{bug, span_bug, ty};
 use tracing::debug;
 
 use super::FunctionCx;
@@ -55,7 +55,7 @@ pub(crate) fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     non_ssa_locals
 }
 
-#[derive(Copy, Clone, PartialEq, Eq)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum LocalKind {
     ZST,
     /// A local that requires an alloca.
@@ -195,12 +195,20 @@ impl<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> Visitor<'tcx> for LocalAnalyzer
         match context {
             PlaceContext::MutatingUse(MutatingUseContext::Call) => {
                 let call = location.block;
-                let TerminatorKind::Call { target, .. } =
-                    self.fx.mir.basic_blocks[call].terminator().kind
+                let TerminatorKind::Call { target, func, .. } =
+                    &self.fx.mir.basic_blocks[call].terminator().kind
                 else {
                     bug!()
                 };
-                self.define(local, DefLocation::CallReturn { call, target });
+                let tcx = self.fx.cx.tcx();
+                let func_ty = func.ty(&self.fx.mir.local_decls, tcx);
+                if let ty::FnDef(def_id, _args) = *func_ty.kind()
+                    && let Some(intrinsic) = tcx.intrinsic(def_id)
+                    && self.fx.cx.intrinsic_call_expects_place_always(intrinsic.name)
+                {
+                    self.locals[local] = LocalKind::Memory;
+                }
+                self.define(local, DefLocation::CallReturn { call, target: *target });
             }
 
             PlaceContext::NonUse(_)
diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs
index f4e08e08ef8db..50c20b6984e7a 100644
--- a/compiler/rustc_codegen_ssa/src/mir/block.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/block.rs
@@ -17,12 +17,13 @@ use rustc_target::callconv::{ArgAbi, ArgAttributes, CastTarget, FnAbi, PassMode}
 use tracing::{debug, info};
 
 use super::operand::OperandRef;
-use super::operand::OperandValue::{Immediate, Pair, Ref, ZeroSized};
+use super::operand::OperandValue::{self, Immediate, Pair, Ref, ZeroSized};
 use super::place::{PlaceRef, PlaceValue};
 use super::{CachedLlbb, FunctionCx, LocalRef};
 use crate::base::{self, is_call_from_compiler_builtins_to_upstream_monomorphization};
 use crate::common::{self, IntPredicate};
 use crate::errors::CompilerBuiltinsCannotCall;
+use crate::mir::IntrinsicResult;
 use crate::traits::*;
 use crate::{MemFlags, meth};
 
@@ -944,32 +945,31 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                         let result_layout =
                             self.cx.layout_of(self.monomorphized_place_ty(destination.as_ref()));
 
-                        let (result, store_in_local) = if result_layout.is_zst() {
-                            (
-                                PlaceRef::new_sized(bx.const_undef(bx.type_ptr()), result_layout),
-                                None,
-                            )
-                        } else if let Some(local) = destination.as_local() {
-                            match self.locals[local] {
-                                LocalRef::Place(dest) => (dest, None),
-                                LocalRef::UnsizedPlace(_) => bug!("return type must be sized"),
-                                LocalRef::PendingOperand => {
-                                    // Currently, intrinsics always need a location to store
-                                    // the result, so we create a temporary `alloca` for the
-                                    // result.
-                                    let tmp = PlaceRef::alloca(bx, result_layout);
-                                    tmp.storage_live(bx);
-                                    (tmp, Some(local))
+                        let (result_place, store_in_local) =
+                            if let Some(local) = destination.as_local() {
+                                match self.locals[local] {
+                                    LocalRef::Place(dest) => (Some(dest.val), None),
+                                    LocalRef::UnsizedPlace(_) => bug!("return type must be sized"),
+                                    LocalRef::PendingOperand => (None, Some(local)),
+                                    LocalRef::Operand(_) => {
+                                        if result_layout.is_zst() {
+                                            let place = PlaceRef::new_sized(
+                                                bx.const_undef(bx.type_ptr()),
+                                                result_layout,
+                                            );
+                                            (Some(place.val), None)
+                                        } else {
+                                            bug!("place local already assigned to");
+                                        }
+                                    }
                                 }
-                                LocalRef::Operand(_) => {
-                                    bug!("place local already assigned to");
-                                }
-                            }
-                        } else {
-                            (self.codegen_place(bx, destination.as_ref()), None)
-                        };
+                            } else {
+                                (Some(self.codegen_place(bx, destination.as_ref()).val), None)
+                            };
 
-                        if result.val.align < result.layout.align.abi {
+                        if let Some(place) = result_place
+                            && place.align < result_layout.align.abi
+                        {
                             // Currently, MIR code generation does not create calls
                             // that store directly to fields of packed structs (in
                             // fact, the calls it creates write only to temps).
@@ -982,16 +982,36 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                         let args: Vec<_> =
                             args.iter().map(|arg| self.codegen_operand(bx, &arg.node)).collect();
 
-                        match self.codegen_intrinsic_call(bx, instance, &args, result, source_info)
-                        {
-                            Ok(()) => {
-                                if let Some(local) = store_in_local {
-                                    let op = bx.load_operand(result);
-                                    result.storage_dead(bx);
+                        let intrinsic_result = self.codegen_intrinsic_call(
+                            bx,
+                            instance,
+                            &args,
+                            result_layout,
+                            result_place,
+                            source_info,
+                        );
+
+                        if let IntrinsicResult::Operand(op_val) = intrinsic_result {
+                            match (result_place, store_in_local) {
+                                (None, Some(local)) => {
+                                    let op = OperandRef {
+                                        val: op_val,
+                                        layout: result_layout,
+                                        move_annotation: None,
+                                    };
                                     self.overwrite_local(local, LocalRef::Operand(op));
                                     self.debug_introduce_local(bx, local);
                                 }
+                                (Some(place_val), None) => {
+                                    let dest = PlaceRef { val: place_val, layout: result_layout };
+                                    op_val.store(bx, dest);
+                                }
+                                _ => bug!(),
+                            }
+                        }
 
+                        match intrinsic_result {
+                            IntrinsicResult::Operand(_) | IntrinsicResult::WroteIntoPlace => {
                                 return if let Some(target) = target {
                                     helper.funclet_br(self, bx, target, mergeable_succ)
                                 } else {
@@ -999,7 +1019,21 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                                     MergingSucc::False
                                 };
                             }
-                            Err(instance) => {
+                            IntrinsicResult::Err(_) => {
+                                // Even though we're definitely going to error, we need it initialize
+                                // the local or `maybe_codegen_consume_direct` might ICE later
+                                // when it goes to use the result from this intrinsic.
+                                if let Some(local) = store_in_local {
+                                    let op = OperandRef {
+                                        val: OperandValue::poison(bx, result_layout),
+                                        layout: result_layout,
+                                        move_annotation: None,
+                                    };
+                                    self.overwrite_local(local, LocalRef::Operand(op));
+                                }
+                                return MergingSucc::False;
+                            }
+                            IntrinsicResult::Fallback(instance) => {
                                 if intrinsic.must_be_overridden {
                                     span_bug!(
                                         fn_span,
diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
index aa144558211ef..95208b070974b 100644
--- a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
@@ -1,16 +1,17 @@
-use rustc_abi::{Align, WrappingRange};
+use rustc_abi::{Align, FieldIdx, WrappingRange};
 use rustc_middle::mir::SourceInfo;
 use rustc_middle::ty::{self, Ty, TyCtxt};
 use rustc_middle::{bug, span_bug};
 use rustc_session::config::OptLevel;
-use rustc_span::sym;
+use rustc_span::{ErrorGuaranteed, sym};
 use rustc_target::spec::Arch;
 
-use super::FunctionCx;
-use super::operand::OperandRef;
-use super::place::PlaceRef;
+use super::operand::{OperandRef, OperandValue};
+use super::place::PlaceValue;
+use super::{FunctionCx, IntrinsicResult};
 use crate::common::{AtomicRmwBinOp, SynchronizationScope};
 use crate::errors::InvalidMonomorphization;
+use crate::mir::operand::OperandRefBuilder;
 use crate::traits::*;
 use crate::{MemFlags, meth, size_of_val};
 
@@ -52,15 +53,16 @@ fn memset_intrinsic<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
 }
 
 impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
-    /// In the `Err` case, returns the instance that should be called instead.
+    /// In the `Fallback` case, returns the instance that should be called instead.
     pub fn codegen_intrinsic_call(
         &mut self,
         bx: &mut Bx,
         instance: ty::Instance<'tcx>,
         args: &[OperandRef<'tcx, Bx::Value>],
-        result: PlaceRef<'tcx, Bx::Value>,
+        result_layout: ty::layout::TyAndLayout<'tcx>,
+        result_place: Option<PlaceValue<Bx::Value>>,
         source_info: SourceInfo,
-    ) -> Result<(), ty::Instance<'tcx>> {
+    ) -> IntrinsicResult<'tcx, Bx::Value> {
         let span = source_info.span;
 
         let name = bx.tcx().item_name(instance.def_id());
@@ -86,19 +88,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 let x_place = args[0].val.deref(align);
                 let y_place = args[1].val.deref(align);
                 bx.typed_place_swap(x_place, y_place, pointee_layout);
-                return Ok(());
+                return IntrinsicResult::Operand(OperandValue::ZeroSized);
             }
         }
 
-        let invalid_monomorphization_int_type = |ty| {
-            bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicIntegerType { span, name, ty });
+        let invalid_monomorphization_int_type = |ty| -> ErrorGuaranteed {
+            bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicIntegerType { span, name, ty })
         };
-        let invalid_monomorphization_int_or_ptr_type = |ty| {
+        let invalid_monomorphization_int_or_ptr_type = |ty| -> ErrorGuaranteed {
             bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicIntegerOrPtrType {
                 span,
                 name,
                 ty,
-            });
+            })
         };
 
         let parse_atomic_ordering = |ord: ty::Value<'tcx>| {
@@ -136,32 +138,34 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             }
         }
 
-        let llval = match name {
+        let op_val: OperandValue<_> = match name {
             sym::abort => {
                 bx.abort();
-                return Ok(());
+                OperandValue::ZeroSized
             }
 
             sym::caller_location => {
                 let location = self.get_caller_location(bx, source_info);
-                location.val.store(bx, result);
-                return Ok(());
+                location.val
             }
 
             // va_end uses the fallback body (a no-op).
-            sym::va_start => bx.va_start(args[0].immediate()),
+            sym::va_start => {
+                bx.va_start(args[0].immediate());
+                OperandValue::ZeroSized
+            }
 
             sym::size_of_val => {
                 let tp_ty = fn_args.type_at(0);
                 let (_, meta) = args[0].val.pointer_parts();
                 let (llsize, _) = size_of_val::size_and_align_of_dst(bx, tp_ty, meta);
-                llsize
+                OperandValue::Immediate(llsize)
             }
             sym::align_of_val => {
                 let tp_ty = fn_args.type_at(0);
                 let (_, meta) = args[0].val.pointer_parts();
                 let (_, llalign) = size_of_val::size_and_align_of_dst(bx, tp_ty, meta);
-                llalign
+                OperandValue::Immediate(llalign)
             }
             sym::vtable_size | sym::vtable_align => {
                 let vtable = args[0].immediate();
@@ -189,14 +193,14 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     }
                     _ => {}
                 }
-                value
+                OperandValue::Immediate(value)
             }
             sym::arith_offset => {
                 let ty = fn_args.type_at(0);
                 let layout = bx.layout_of(ty);
                 let ptr = args[0].immediate();
                 let offset = args[1].immediate();
-                bx.gep(bx.backend_type(layout), ptr, &[offset])
+                OperandValue::Immediate(bx.gep(bx.backend_type(layout), ptr, &[offset]))
             }
             sym::copy => {
                 copy_intrinsic(
@@ -208,7 +212,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     args[0].immediate(),
                     args[2].immediate(),
                 );
-                return Ok(());
+                OperandValue::ZeroSized
             }
             sym::write_bytes => {
                 memset_intrinsic(
@@ -219,7 +223,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     args[1].immediate(),
                     args[2].immediate(),
                 );
-                return Ok(());
+                OperandValue::ZeroSized
             }
 
             sym::volatile_copy_nonoverlapping_memory => {
@@ -232,7 +236,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     args[1].immediate(),
                     args[2].immediate(),
                 );
-                return Ok(());
+                OperandValue::ZeroSized
             }
             sym::volatile_copy_memory => {
                 copy_intrinsic(
@@ -244,7 +248,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     args[1].immediate(),
                     args[2].immediate(),
                 );
-                return Ok(());
+                OperandValue::ZeroSized
             }
             sym::volatile_set_memory => {
                 memset_intrinsic(
@@ -255,60 +259,58 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     args[1].immediate(),
                     args[2].immediate(),
                 );
-                return Ok(());
+                OperandValue::ZeroSized
             }
             sym::volatile_store => {
                 let dst = args[0].deref(bx.cx());
                 args[1].val.volatile_store(bx, dst);
-                return Ok(());
+                OperandValue::ZeroSized
             }
             sym::unaligned_volatile_store => {
                 let dst = args[0].deref(bx.cx());
                 args[1].val.unaligned_volatile_store(bx, dst);
-                return Ok(());
+                OperandValue::ZeroSized
             }
             sym::disjoint_bitor => {
                 let a = args[0].immediate();
                 let b = args[1].immediate();
-                bx.or_disjoint(a, b)
+                OperandValue::Immediate(bx.or_disjoint(a, b))
             }
             sym::exact_div => {
                 let ty = args[0].layout.ty;
                 match int_type_width_signed(ty, bx.tcx()) {
-                    Some((_width, signed)) => {
-                        if signed {
-                            bx.exactsdiv(args[0].immediate(), args[1].immediate())
-                        } else {
-                            bx.exactudiv(args[0].immediate(), args[1].immediate())
-                        }
-                    }
+                    Some((_width, signed)) => OperandValue::Immediate(if signed {
+                        bx.exactsdiv(args[0].immediate(), args[1].immediate())
+                    } else {
+                        bx.exactudiv(args[0].immediate(), args[1].immediate())
+                    }),
                     None => {
-                        bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
-                            span,
-                            name,
-                            ty,
-                        });
-                        return Ok(());
+                        let err = bx
+                            .tcx()
+                            .dcx()
+                            .emit_err(InvalidMonomorphization::BasicIntegerType { span, name, ty });
+                        return IntrinsicResult::Err(err);
                     }
                 }
             }
             sym::fadd_fast | sym::fsub_fast | sym::fmul_fast | sym::fdiv_fast | sym::frem_fast => {
                 match float_type_width(args[0].layout.ty) {
-                    Some(_width) => match name {
+                    Some(_width) => OperandValue::Immediate(match name {
                         sym::fadd_fast => bx.fadd_fast(args[0].immediate(), args[1].immediate()),
                         sym::fsub_fast => bx.fsub_fast(args[0].immediate(), args[1].immediate()),
                         sym::fmul_fast => bx.fmul_fast(args[0].immediate(), args[1].immediate()),
                         sym::fdiv_fast => bx.fdiv_fast(args[0].immediate(), args[1].immediate()),
                         sym::frem_fast => bx.frem_fast(args[0].immediate(), args[1].immediate()),
                         _ => bug!(),
-                    },
+                    }),
                     None => {
-                        bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicFloatType {
-                            span,
-                            name,
-                            ty: args[0].layout.ty,
-                        });
-                        return Ok(());
+                        let err =
+                            bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicFloatType {
+                                span,
+                                name,
+                                ty: args[0].layout.ty,
+                            });
+                        return IntrinsicResult::Err(err);
                     }
                 }
             }
@@ -317,7 +319,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             | sym::fmul_algebraic
             | sym::fdiv_algebraic
             | sym::frem_algebraic => match float_type_width(args[0].layout.ty) {
-                Some(_width) => match name {
+                Some(_width) => OperandValue::Immediate(match name {
                     sym::fadd_algebraic => {
                         bx.fadd_algebraic(args[0].immediate(), args[1].immediate())
                     }
@@ -334,75 +336,77 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                         bx.frem_algebraic(args[0].immediate(), args[1].immediate())
                     }
                     _ => bug!(),
-                },
+                }),
                 None => {
-                    bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicFloatType {
+                    let err = bx.tcx().dcx().emit_err(InvalidMonomorphization::BasicFloatType {
                         span,
                         name,
                         ty: args[0].layout.ty,
                     });
-                    return Ok(());
+                    return IntrinsicResult::Err(err);
                 }
             },
 
             sym::float_to_int_unchecked => {
                 if float_type_width(args[0].layout.ty).is_none() {
-                    bx.tcx().dcx().emit_err(InvalidMonomorphization::FloatToIntUnchecked {
-                        span,
-                        ty: args[0].layout.ty,
-                    });
-                    return Ok(());
+                    let err =
+                        bx.tcx().dcx().emit_err(InvalidMonomorphization::FloatToIntUnchecked {
+                            span,
+                            ty: args[0].layout.ty,
+                        });
+                    return IntrinsicResult::Err(err);
                 }
-                let Some((_width, signed)) = int_type_width_signed(result.layout.ty, bx.tcx())
+                let Some((_width, signed)) = int_type_width_signed(result_layout.ty, bx.tcx())
                 else {
-                    bx.tcx().dcx().emit_err(InvalidMonomorphization::FloatToIntUnchecked {
-                        span,
-                        ty: result.layout.ty,
-                    });
-                    return Ok(());
+                    let err =
+                        bx.tcx().dcx().emit_err(InvalidMonomorphization::FloatToIntUnchecked {
+                            span,
+                            ty: result_layout.ty,
+                        });
+                    return IntrinsicResult::Err(err);
                 };
-                if signed {
-                    bx.fptosi(args[0].immediate(), bx.backend_type(result.layout))
+                OperandValue::Immediate(if signed {
+                    bx.fptosi(args[0].immediate(), bx.backend_type(result_layout))
                 } else {
-                    bx.fptoui(args[0].immediate(), bx.backend_type(result.layout))
-                }
+                    bx.fptoui(args[0].immediate(), bx.backend_type(result_layout))
+                })
             }
 
             sym::atomic_load => {
                 let ty = fn_args.type_at(0);
                 if !(int_type_width_signed(ty, bx.tcx()).is_some() || ty.is_raw_ptr()) {
-                    invalid_monomorphization_int_or_ptr_type(ty);
-                    return Ok(());
+                    let err = invalid_monomorphization_int_or_ptr_type(ty);
+                    return IntrinsicResult::Err(err);
                 }
                 let ordering = fn_args.const_at(1).to_value();
                 let layout = bx.layout_of(ty);
                 let source = args[0].immediate();
-                bx.atomic_load(
+                OperandValue::Immediate(bx.atomic_load(
                     bx.backend_type(layout),
                     source,
                     parse_atomic_ordering(ordering),
                     layout.size,
-                )
+                ))
             }
             sym::atomic_store => {
                 let ty = fn_args.type_at(0);
                 if !(int_type_width_signed(ty, bx.tcx()).is_some() || ty.is_raw_ptr()) {
-                    invalid_monomorphization_int_or_ptr_type(ty);
-                    return Ok(());
+                    let err = invalid_monomorphization_int_or_ptr_type(ty);
+                    return IntrinsicResult::Err(err);
                 }
                 let ordering = fn_args.const_at(1).to_value();
                 let size = bx.layout_of(ty).size;
                 let val = args[1].immediate();
                 let ptr = args[0].immediate();
                 bx.atomic_store(val, ptr, parse_atomic_ordering(ordering), size);
-                return Ok(());
+                OperandValue::ZeroSized
             }
             // These are all AtomicRMW ops
             sym::atomic_cxchg | sym::atomic_cxchgweak => {
                 let ty = fn_args.type_at(0);
                 if !(int_type_width_signed(ty, bx.tcx()).is_some() || ty.is_raw_ptr()) {
-                    invalid_monomorphization_int_or_ptr_type(ty);
-                    return Ok(());
+                    let err = invalid_monomorphization_int_or_ptr_type(ty);
+                    return IntrinsicResult::Err(err);
                 }
                 let succ_ordering = fn_args.const_at(1).to_value();
                 let fail_ordering = fn_args.const_at(2).to_value();
@@ -421,12 +425,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 let val = bx.from_immediate(val);
                 let success = bx.from_immediate(success);
 
-                let dest = result.project_field(bx, 0);
-                bx.store_to_place(val, dest.val);
-                let dest = result.project_field(bx, 1);
-                bx.store_to_place(success, dest.val);
-
-                return Ok(());
+                let mut builder = OperandRefBuilder::new(result_layout);
+                builder.insert_imm(FieldIdx::from_u32(0), val);
+                builder.insert_imm(FieldIdx::from_u32(1), success);
+                builder.build(bx.cx()).val
             }
             sym::atomic_max | sym::atomic_min => {
                 let atom_op = if name == sym::atomic_max {
@@ -440,16 +442,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     let ordering = fn_args.const_at(1).to_value();
                     let ptr = args[0].immediate();
                     let val = args[1].immediate();
-                    bx.atomic_rmw(
+                    OperandValue::Immediate(bx.atomic_rmw(
                         atom_op,
                         ptr,
                         val,
                         parse_atomic_ordering(ordering),
                         /* ret_ptr */ false,
-                    )
+                    ))
                 } else {
-                    invalid_monomorphization_int_type(ty);
-                    return Ok(());
+                    let err = invalid_monomorphization_int_type(ty);
+                    return IntrinsicResult::Err(err);
                 }
             }
             sym::atomic_umax | sym::atomic_umin => {
@@ -464,16 +466,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     let ordering = fn_args.const_at(1).to_value();
                     let ptr = args[0].immediate();
                     let val = args[1].immediate();
-                    bx.atomic_rmw(
+                    OperandValue::Immediate(bx.atomic_rmw(
                         atom_op,
                         ptr,
                         val,
                         parse_atomic_ordering(ordering),
                         /* ret_ptr */ false,
-                    )
+                    ))
                 } else {
-                    invalid_monomorphization_int_type(ty);
-                    return Ok(());
+                    let err = invalid_monomorphization_int_type(ty);
+                    return IntrinsicResult::Err(err);
                 }
             }
             sym::atomic_xchg => {
@@ -483,16 +485,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     let ptr = args[0].immediate();
                     let val = args[1].immediate();
                     let atomic_op = AtomicRmwBinOp::AtomicXchg;
-                    bx.atomic_rmw(
+                    OperandValue::Immediate(bx.atomic_rmw(
                         atomic_op,
                         ptr,
                         val,
                         parse_atomic_ordering(ordering),
                         /* ret_ptr */ ty.is_raw_ptr(),
-                    )
+                    ))
                 } else {
-                    invalid_monomorphization_int_or_ptr_type(ty);
-                    return Ok(());
+                    let err = invalid_monomorphization_int_or_ptr_type(ty);
+                    return IntrinsicResult::Err(err);
                 }
             }
             sym::atomic_xadd
@@ -524,22 +526,22 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 {
                     let ptr = args[0].immediate(); // of type "pointer to `ty_mem`"
                     let val = args[1].immediate(); // of type `ty_op`
-                    bx.atomic_rmw(
+                    OperandValue::Immediate(bx.atomic_rmw(
                         atom_op,
                         ptr,
                         val,
                         parse_atomic_ordering(ordering),
                         /* ret_ptr */ ty_mem.is_raw_ptr(),
-                    )
+                    ))
                 } else {
-                    invalid_monomorphization_int_or_ptr_type(ty_mem);
-                    return Ok(());
+                    let err = invalid_monomorphization_int_or_ptr_type(ty_mem);
+                    return IntrinsicResult::Err(err);
                 }
             }
             sym::atomic_fence => {
                 let ordering = fn_args.const_at(0).to_value();
                 bx.atomic_fence(parse_atomic_ordering(ordering), SynchronizationScope::CrossThread);
-                return Ok(());
+                OperandValue::ZeroSized
             }
 
             sym::atomic_singlethreadfence => {
@@ -548,13 +550,13 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     parse_atomic_ordering(ordering),
                     SynchronizationScope::SingleThread,
                 );
-                return Ok(());
+                OperandValue::ZeroSized
             }
 
             sym::nontemporal_store => {
                 let dst = args[0].deref(bx.cx());
                 args[1].val.nontemporal_store(bx, dst);
-                return Ok(());
+                OperandValue::ZeroSized
             }
 
             sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
@@ -566,7 +568,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 let a = bx.ptrtoint(a, bx.type_isize());
                 let b = bx.ptrtoint(b, bx.type_isize());
                 let pointee_size = bx.const_usize(pointee_size.bytes());
-                if name == sym::ptr_offset_from {
+                OperandValue::Immediate(if name == sym::ptr_offset_from {
                     // This is the same sequence that Clang emits for pointer subtraction.
                     // It can be neither `nsw` nor `nuw` because the input is treated as
                     // unsigned but then the output is treated as signed, so neither works.
@@ -578,27 +580,32 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     // so can use `sub nuw` and `udiv exact` instead of dealing in signed.
                     let d = bx.unchecked_usub(a, b);
                     bx.exactudiv(d, pointee_size)
-                }
+                })
             }
 
             sym::cold_path => {
                 // This is a no-op. The intrinsic is just a hint to the optimizer.
-                return Ok(());
+                OperandValue::ZeroSized
             }
 
             _ => {
                 // Need to use backend-specific things in the implementation.
-                return bx.codegen_intrinsic_call(instance, args, result, span);
+                let result =
+                    bx.codegen_intrinsic_call(instance, args, result_layout, result_place, span);
+                if let IntrinsicResult::Operand(op) = result {
+                    op
+                } else {
+                    return result;
+                }
             }
         };
 
-        if result.layout.ty.is_bool() {
-            let val = bx.from_immediate(llval);
-            bx.store_to_place(val, result.val);
-        } else if !result.layout.ty.is_unit() {
-            bx.store_to_place(llval, result.val);
-        }
-        Ok(())
+        debug_assert!(
+            op_val.is_expected_variant_for_type(bx.cx(), result_layout),
+            "[{name:?}] Value {op_val:?} is wrong for type {result_layout:?}",
+        );
+
+        IntrinsicResult::Operand(op_val)
     }
 }
 
diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs
index 84013a00d79df..4bcf037ecce07 100644
--- a/compiler/rustc_codegen_ssa/src/mir/mod.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@@ -7,6 +7,7 @@ use rustc_middle::mir::{Body, Local, UnwindTerminateReason, traversal};
 use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, TyAndLayout};
 use rustc_middle::ty::{self, Instance, Ty, TyCtxt, TypeFoldable, TypeVisitableExt};
 use rustc_middle::{bug, mir, span_bug};
+use rustc_span::ErrorGuaranteed;
 use rustc_target::callconv::{FnAbi, PassMode};
 use tracing::{debug, instrument};
 
@@ -157,6 +158,29 @@ enum LocalRef<'tcx, V> {
     PendingOperand,
 }
 
+pub enum IntrinsicResult<'tcx, V> {
+    /// This intrinsic created an operand without using the `result_place` argument.
+    ///
+    /// `codegen_call_terminator` will handle writing the result into the place,
+    /// if doing so is needed.
+    ///
+    /// The vast majority of intrinsics can do this, see MCP#970
+    Operand(OperandValue<V>),
+
+    /// The intrinsic wrote its result into the `result_place` argument.
+    ///
+    /// Most things don't need to do this, but there are some: `volatile_load`
+    /// of a non-scalar type, for example, has to.
+    WroteIntoPlace,
+
+    /// Another instance should be called instead. This is used to invoke intrinsic
+    /// default bodies in case an intrinsic is not implemented by the backend.
+    Fallback(ty::Instance<'tcx>),
+
+    /// Arguably this shouldn't exist, per MCP#620, but a bunch do it.
+    Err(ErrorGuaranteed),
+}
+
 impl<'tcx, V: CodegenObject> LocalRef<'tcx, V> {
     fn new_operand(layout: TyAndLayout<'tcx>) -> LocalRef<'tcx, V> {
         if layout.is_zst() {
diff --git a/compiler/rustc_codegen_ssa/src/mir/operand.rs b/compiler/rustc_codegen_ssa/src/mir/operand.rs
index e1d1ef858c017..83fce5a5c8deb 100644
--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -103,6 +103,7 @@ impl<V: CodegenObject> OperandValue<V> {
         PlaceValue { llval, llextra, align }
     }
 
+    #[must_use]
     pub(crate) fn is_expected_variant_for_type<'tcx, Cx: LayoutTypeCodegenMethods<'tcx>>(
         &self,
         cx: &Cx,
diff --git a/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs b/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs
index d1e6436f6b1eb..d515d0c775c3d 100644
--- a/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs
@@ -2,8 +2,9 @@ use rustc_middle::ty;
 use rustc_span::Span;
 
 use super::BackendTypes;
+use crate::mir::IntrinsicResult;
 use crate::mir::operand::OperandRef;
-use crate::mir::place::PlaceRef;
+use crate::mir::place::PlaceValue;
 
 pub trait IntrinsicCallBuilderMethods<'tcx>: BackendTypes {
     /// Higher-level interface to emitting calls to intrinsics
@@ -11,9 +12,12 @@ pub trait IntrinsicCallBuilderMethods<'tcx>: BackendTypes {
     /// Remember to add all intrinsics here, in `compiler/rustc_hir_analysis/src/check/mod.rs`,
     /// and in `library/core/src/intrinsics.rs`; if you need access to any LLVM intrinsics,
     /// add them to `compiler/rustc_codegen_llvm/src/context.rs`.
-    /// Returns `Err` if another instance should be called instead. This is used to invoke
+    /// Returns `Fallback` if another instance should be called instead. This is used to invoke
     /// intrinsic default bodies in case an intrinsic is not implemented by the backend.
     ///
+    /// The `result_place` will be provided for things that weren't `LocalKind::SSA`.
+    /// If you need it for more things, see `intrinsic_call_expects_place_always`.
+    ///
     /// NOTE: allowed to call [`BuilderMethods::call`]
     ///
     /// [`BuilderMethods::call`]: super::builder::BuilderMethods::call
@@ -21,9 +25,10 @@ pub trait IntrinsicCallBuilderMethods<'tcx>: BackendTypes {
         &mut self,
         instance: ty::Instance<'tcx>,
         args: &[OperandRef<'tcx, Self::Value>],
-        result_dest: PlaceRef<'tcx, Self::Value>,
+        result_layout: ty::layout::TyAndLayout<'tcx>,
+        result_place: Option<PlaceValue<Self::Value>>,
         span: Span,
-    ) -> Result<(), ty::Instance<'tcx>>;
+    ) -> IntrinsicResult<'tcx, Self::Value>;
 
     fn codegen_llvm_intrinsic_call(
         &mut self,
@@ -45,8 +50,8 @@ pub trait IntrinsicCallBuilderMethods<'tcx>: BackendTypes {
     ) -> Self::Value;
     /// Trait method used to inject `va_start` on the "spoofed" `VaList` in
     /// Rust defined C-variadic functions.
-    fn va_start(&mut self, val: Self::Value) -> Self::Value;
+    fn va_start(&mut self, val: Self::Value);
     /// Trait method used to inject `va_end` on the "spoofed" `VaList` before
     /// Rust defined C-variadic functions return.
-    fn va_end(&mut self, val: Self::Value) -> Self::Value;
+    fn va_end(&mut self, val: Self::Value);
 }
diff --git a/compiler/rustc_codegen_ssa/src/traits/misc.rs b/compiler/rustc_codegen_ssa/src/traits/misc.rs
index 6a0f889833492..92ddc1f347994 100644
--- a/compiler/rustc_codegen_ssa/src/traits/misc.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/misc.rs
@@ -3,6 +3,7 @@ use std::cell::RefCell;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_middle::ty::{self, Instance, Ty};
 use rustc_session::Session;
+use rustc_span::Symbol;
 
 use super::BackendTypes;
 
@@ -26,4 +27,10 @@ pub trait MiscCodegenMethods<'tcx>: BackendTypes {
     /// Declares the extern "C" main function for the entry point. Returns None if the symbol
     /// already exists.
     fn declare_c_main(&self, fn_type: Self::FunctionSignature) -> Option<Self::Function>;
+
+    /// Whether `codegen_intrinsic_call` expects to always have a `place_value`
+    /// when emitting code for the intrinsic `name`.
+    ///
+    /// This is discouraged, but here for now to simplify migration to using OperandValues
+    fn intrinsic_call_expects_place_always(&self, name: Symbol) -> bool;
 }
diff --git a/tests/codegen-llvm/array-equality.rs b/tests/codegen-llvm/array-equality.rs
index 8e4c170e4e674..385b7d7803594 100644
--- a/tests/codegen-llvm/array-equality.rs
+++ b/tests/codegen-llvm/array-equality.rs
@@ -9,8 +9,8 @@
 #[no_mangle]
 pub fn array_eq_value(a: [u16; 3], b: [u16; 3]) -> bool {
     // CHECK-NEXT: start:
-    // CHECK-NEXT: %2 = icmp eq i48 %0, %1
-    // CHECK-NEXT: ret i1 %2
+    // CHECK-NEXT: %_0 = icmp eq i48 %0, %1
+    // CHECK-NEXT: ret i1 %_0
     a == b
 }
 
diff --git a/tests/codegen-llvm/atomicptr.rs b/tests/codegen-llvm/atomicptr.rs
index 9d5e618fe76f2..9042f71e9442c 100644
--- a/tests/codegen-llvm/atomicptr.rs
+++ b/tests/codegen-llvm/atomicptr.rs
@@ -19,17 +19,20 @@ pub fn helper(_: usize) {}
 // CHECK-LABEL: @atomicptr_fetch_byte_add
 #[no_mangle]
 pub fn atomicptr_fetch_byte_add(a: &AtomicPtr<u8>, v: usize) -> *mut u8 {
-    // CHECK: llvm.lifetime.start
+    // CHECK: start
     // CHECK-NEXT: %[[RET:.*]] = atomicrmw add ptr %{{.*}}, [[USIZE]] %v
-    // CHECK-NEXT: inttoptr [[USIZE]] %[[RET]] to ptr
+    // CHECK-NEXT: %[[RETPTR:.*]] = inttoptr [[USIZE]] %[[RET]] to ptr
+    // CHECK-NEXT: ret ptr %[[RETPTR]]
     a.fetch_byte_add(v, Relaxed)
 }
 
 // CHECK-LABEL: @atomicptr_swap
 #[no_mangle]
 pub fn atomicptr_swap(a: &AtomicPtr<u8>, ptr: *mut u8) -> *mut u8 {
+    // CHECK: start
     // CHECK-NOT: ptrtoint
-    // CHECK: atomicrmw xchg ptr %{{.*}}, ptr %{{.*}} monotonic
+    // CHECK-NEXT: %[[RET:.*]] = atomicrmw xchg ptr %{{.*}}, ptr %{{.*}} monotonic
     // CHECK-NOT: inttoptr
+    // CHECK-NEXT: ret ptr %[[RET]]
     a.swap(ptr, Relaxed)
 }
diff --git a/tests/codegen-llvm/dst-vtable-align-nonzero.rs b/tests/codegen-llvm/dst-vtable-align-nonzero.rs
index 2eee91876683c..80bb55c705dd1 100644
--- a/tests/codegen-llvm/dst-vtable-align-nonzero.rs
+++ b/tests/codegen-llvm/dst-vtable-align-nonzero.rs
@@ -52,7 +52,7 @@ pub fn does_not_eliminate_runtime_check_when_align_2(
 // CHECK-LABEL: @align_load_from_align_of_val
 #[no_mangle]
 pub fn align_load_from_align_of_val(x: &dyn Trait) -> usize {
-    // CHECK: {{%[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META]]
+    // CHECK: {{%_?[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META]]
     core::mem::align_of_val(x)
 }
 
@@ -60,7 +60,7 @@ pub fn align_load_from_align_of_val(x: &dyn Trait) -> usize {
 #[no_mangle]
 pub unsafe fn align_load_from_vtable_align_intrinsic(x: &dyn Trait) -> usize {
     let (data, vtable): (*const (), *const ()) = core::mem::transmute(x);
-    // CHECK: {{%[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META]]
+    // CHECK: {{%_?[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META]]
     core::intrinsics::vtable_align(vtable)
 }
 
diff --git a/tests/codegen-llvm/dst-vtable-size-range.rs b/tests/codegen-llvm/dst-vtable-size-range.rs
index 670f5e8d553fa..92fd68ece6a08 100644
--- a/tests/codegen-llvm/dst-vtable-size-range.rs
+++ b/tests/codegen-llvm/dst-vtable-size-range.rs
@@ -20,7 +20,7 @@ pub fn generate_exclusive_bound() -> usize {
 // CHECK-LABEL: @size_load_from_size_of_val
 #[no_mangle]
 pub fn size_load_from_size_of_val(x: &dyn Trait) -> usize {
-    // CHECK: {{%[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META:![0-9]+]]
+    // CHECK: {{%_?[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META:![0-9]+]]
     core::mem::size_of_val(x)
 }
 
@@ -28,7 +28,7 @@ pub fn size_load_from_size_of_val(x: &dyn Trait) -> usize {
 #[no_mangle]
 pub unsafe fn size_load_from_vtable_size_intrinsic(x: &dyn Trait) -> usize {
     let (data, vtable): (*const (), *const ()) = core::mem::transmute(x);
-    // CHECK: {{%[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META]]
+    // CHECK: {{%_?[0-9]+}} = load [[USIZE]], {{.+}} !range [[RANGE_META]]
     core::intrinsics::vtable_size(vtable)
 }
 
diff --git a/tests/codegen-llvm/intrinsics/disjoint_bitor.rs b/tests/codegen-llvm/intrinsics/disjoint_bitor.rs
index fc45439ee0b95..b9d17bc9281d0 100644
--- a/tests/codegen-llvm/intrinsics/disjoint_bitor.rs
+++ b/tests/codegen-llvm/intrinsics/disjoint_bitor.rs
@@ -8,14 +8,16 @@ use std::intrinsics::disjoint_bitor;
 // CHECK-LABEL: @disjoint_bitor_signed
 #[no_mangle]
 pub unsafe fn disjoint_bitor_signed(x: i32, y: i32) -> i32 {
-    // CHECK: or disjoint i32 %x, %y
+    // CHECK: [[TEMP:%.+]] = or disjoint i32 %x, %y
+    // CHECK: ret i32 [[TEMP]]
     disjoint_bitor(x, y)
 }
 
 // CHECK-LABEL: @disjoint_bitor_unsigned
 #[no_mangle]
 pub unsafe fn disjoint_bitor_unsigned(x: u64, y: u64) -> u64 {
-    // CHECK: or disjoint i64 %x, %y
+    // CHECK: [[TEMP:%.+]] = or disjoint i64 %x, %y
+    // CHECK: ret i64 [[TEMP]]
     disjoint_bitor(x, y)
 }
 
@@ -25,6 +27,6 @@ pub unsafe fn disjoint_bitor_literal() -> u8 {
     // This is a separate check because even without any passes,
     // LLVM will fold so it's not an instruction, which can assert in LLVM.
 
-    // CHECK: store i8 3
+    // CHECK: ret i8 3
     disjoint_bitor(1, 2)
 }
diff --git a/tests/codegen-llvm/intrinsics/size_and_align_of_val.rs b/tests/codegen-llvm/intrinsics/size_and_align_of_val.rs
index 813a4ae1479ce..2ae45aec5a623 100644
--- a/tests/codegen-llvm/intrinsics/size_and_align_of_val.rs
+++ b/tests/codegen-llvm/intrinsics/size_and_align_of_val.rs
@@ -13,41 +13,32 @@ use std::intrinsics::{align_of_val, size_of_val};
 #[no_mangle]
 pub unsafe fn align_of_array(x: &[u16; 7]) -> usize {
     // CHECK: start:
-    // CHECK: %0 = alloca [8 x i8]
-    // CHECK: store i64 2, ptr %0
-    // CHECK: [[R:%.+]] = load i64, ptr %0
-    // CHECK: ret i64 [[R]]
+    // CHECK-NEXT: ret i64 2
     align_of_val(x)
 }
 
 // CHECK-LABEL: @size_of_array(
 #[no_mangle]
 pub unsafe fn size_of_array(x: &[u16; 7]) -> usize {
-    // CHECK: %0 = alloca [8 x i8]
-    // CHECK: store i64 14, ptr %0
-    // CHECK: [[R:%.+]] = load i64, ptr %0
-    // CHECK: ret i64 [[R]]
+    // CHECK: start:
+    // CHECK-NEXT: ret i64 14
     size_of_val(x)
 }
 
 // CHECK-LABEL: @align_of_slice(
 #[no_mangle]
 pub unsafe fn align_of_slice(x: &[u16]) -> usize {
-    // CHECK: %0 = alloca [8 x i8]
-    // CHECK: [[SIZE:%.+]] = mul nuw nsw i64 %x.1, 2
-    // CHECK: store i64 2, ptr %0
-    // CHECK: [[R:%.+]] = load i64, ptr %0
-    // CHECK: ret i64 [[R]]
+    // CHECK: start:
+    // CHECK-NEXT: [[SIZE:%.+]] = mul nuw nsw i64 %x.1, 2
+    // CHECK-NEXT: ret i64 2
     align_of_val(x)
 }
 
 // CHECK-LABEL: @size_of_slice(
 #[no_mangle]
 pub unsafe fn size_of_slice(x: &[u16]) -> usize {
-    // CHECK: %0 = alloca [8 x i8]
-    // CHECK: [[SIZE:%.+]] = mul nuw nsw i64 %x.1, 2
-    // CHECK: store i64 [[SIZE]], ptr %0
-    // CHECK: [[R:%.+]] = load i64, ptr %0
-    // CHECK: ret i64 [[R]]
+    // CHECK: start:
+    // CHECK-NEXT: [[SIZE:%.+]] = mul nuw nsw i64 %x.1, 2
+    // CHECK-NEXT: ret i64 [[SIZE]]
     size_of_val(x)
 }
diff --git a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-bitmask.rs b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-bitmask.rs
index 294262d81526f..4af0287fbc120 100644
--- a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-bitmask.rs
+++ b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-generic-bitmask.rs
@@ -20,29 +20,29 @@ use std::intrinsics::simd::simd_bitmask;
 // CHECK-LABEL: @bitmask_int
 #[no_mangle]
 pub unsafe fn bitmask_int(x: i32x2) -> u8 {
-    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|0}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
     // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
     // CHECK: [[C:%[0-9]+]] = bitcast <2 x i1> [[B]] to i2
-    // CHECK: %{{[0-9]+}} = zext i2 [[C]] to i8
+    // CHECK: %{{_?[0-9]+}} = zext i2 [[C]] to i8
     simd_bitmask(x)
 }
 
 // CHECK-LABEL: @bitmask_uint
 #[no_mangle]
 pub unsafe fn bitmask_uint(x: u32x2) -> u8 {
-    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|0}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
     // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
     // CHECK: [[C:%[0-9]+]] = bitcast <2 x i1> [[B]] to i2
-    // CHECK: %{{[0-9]+}} = zext i2 [[C]] to i8
+    // CHECK: %{{_?[0-9]+}} = zext i2 [[C]] to i8
     simd_bitmask(x)
 }
 
 // CHECK-LABEL: @bitmask_int16
 #[no_mangle]
 pub unsafe fn bitmask_int16(x: i8x16) -> u16 {
-    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|1|2}}, {{<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>|splat \(i8 7\)}}
+    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|0|1}}, {{<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>|splat \(i8 7\)}}
     // CHECK: [[B:%[0-9]+]] = trunc <16 x i8> [[A]] to <16 x i1>
-    // CHECK: %{{[0-9]+}} = bitcast <16 x i1> [[B]] to i16
+    // CHECK: %{{_?[0-9]+}} = bitcast <16 x i1> [[B]] to i16
     // CHECK-NOT: zext
     simd_bitmask(x)
 }
diff --git a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-mask-reduce.rs b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-mask-reduce.rs
index 79f00a6ed6032..7521ba1fcb573 100644
--- a/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-mask-reduce.rs
+++ b/tests/codegen-llvm/simd-intrinsic/simd-intrinsic-mask-reduce.rs
@@ -22,39 +22,39 @@ pub type mask8x16 = Simd<i8, 16>;
 // CHECK-LABEL: @reduce_any_32x2
 #[no_mangle]
 pub unsafe fn reduce_any_32x2(x: mask32x2) -> bool {
-    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|0}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
     // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
-    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[B]])
-    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    // CHECK: [[C:%_?[0-9]+]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[B]])
+    // CHECK: ret i1 [[C]]
     simd_reduce_any(x)
 }
 
 // CHECK-LABEL: @reduce_all_32x2
 #[no_mangle]
 pub unsafe fn reduce_all_32x2(x: mask32x2) -> bool {
-    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|0}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
     // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
-    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> [[B]])
-    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    // CHECK: [[C:%_?[0-9]+]] = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> [[B]])
+    // CHECK: ret i1 [[C]]
     simd_reduce_all(x)
 }
 
 // CHECK-LABEL: @reduce_any_8x16
 #[no_mangle]
 pub unsafe fn reduce_any_8x16(x: mask8x16) -> bool {
-    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|1}}, {{<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>|splat \(i8 7\)}}
+    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|0}}, {{<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>|splat \(i8 7\)}}
     // CHECK: [[B:%[0-9]+]] = trunc <16 x i8> [[A]] to <16 x i1>
-    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[B]])
-    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    // CHECK: [[C:%_?[0-9]+]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[B]])
+    // CHECK: ret i1 [[C]]
     simd_reduce_any(x)
 }
 
 // CHECK-LABEL: @reduce_all_8x16
 #[no_mangle]
 pub unsafe fn reduce_all_8x16(x: mask8x16) -> bool {
-    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|1}}, {{<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>|splat \(i8 7\)}}
+    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|0}}, {{<i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>|splat \(i8 7\)}}
     // CHECK: [[B:%[0-9]+]] = trunc <16 x i8> [[A]] to <16 x i1>
-    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> [[B]])
-    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    // CHECK: [[C:%_?[0-9]+]] = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> [[B]])
+    // CHECK: ret i1 [[C]]
     simd_reduce_all(x)
 }
diff --git a/tests/codegen-llvm/simd/aggregate-simd.rs b/tests/codegen-llvm/simd/aggregate-simd.rs
index 57a301d634c81..2a31287fd9aea 100644
--- a/tests/codegen-llvm/simd/aggregate-simd.rs
+++ b/tests/codegen-llvm/simd/aggregate-simd.rs
@@ -88,11 +88,9 @@ pub fn transparent_simd_aggregate(x: [u32; 4]) -> u32 {
 
     // CHECK-LABEL: transparent_simd_aggregate
     // CHECK-NOT: alloca
-    // CHECK: %[[RET:.+]] = alloca [4 x i8]
-    // CHECK-NOT: alloca
     // CHECK: %a = load <4 x i32>, ptr %x, align 4
     // CHECK: %[[TEMP:.+]] = extractelement <4 x i32> %a, i32 1
-    // CHECK: store i32 %[[TEMP]], ptr %[[RET]]
+    // CHECK: ret i32 %[[TEMP]]
 
     unsafe {
         let a = Simd(x);